open($fn_in, 'UTF-8') or exit("Failed to open input xml file: {$fn_in} \n");; //$fn_hd = '/host/ABLE/BoB/wip/'.$volume.'_teiHeader.xsl'; $fn_hd = 'C:\ABLE\BoB\wip\\'.$volume.'_teiheader.xsl'; $fhd = fopen($fn_hd, 'r') or exit("Failed to open input xsl file: {$fn_hd} \n"); //$fn_out = '/host/ABLE/BoB/wip/'.$volume.'_abbyy_to_tei_by_xmlreader.xml'; $fn_out = 'C:\ABLE\BoB\wip\\'.$volume.'_abbyy_to_tei_by_xmlreader.xml'; $fout = fopen($fn_out, 'w') or exit("Failed to open output xml file: {$fn_out} \n"); //define TEI XML $tei_begin = " "; $tei_middle = "\n"; $tei_end = "\n"; // intialise variables $hi_count = 0; $old_fontsize = 0; $output = ''; $write = false; // main processing echo "Writing TEI header\n"; fwrite($fout, $tei_begin); while ($buffer = fgets($fhd)) { if ('') != false) { break; } } echo "Writing TEI text\n"; fwrite($fout, $tei_middle); while ($xml->read()) { /* if ($xml->nodeType == XMLREADER::TEXT) { $output .= $xml->readString(); // doesn't work! spaces not retrieved! hence replacement below using charParams element */ if ($xml->nodeType == XMLREADER::ELEMENT && $xml->localName =='charParams') { $xml->read(); $char = $xml->value; $output .= htmlspecialchars($char, ENT_QUOTES, 'UTF-8'); } elseif ($xml->nodeType == XMLREADER::ELEMENT && $xml->localName =='formatting') { if ($xml->moveToAttribute('bold')) { $output .= ''; $hi_count++; } if ($xml->moveToAttribute('italic')) { $output .= ''; $hi_count++; } if ($xml->moveToAttribute('fs')) { $t = $xml->value; if ($old_fontsize !== $t) { $output .= ''; $old_fontsize = $t; $hi_count++; } } } elseif ($xml->nodeType == XMLREADER::ELEMENT && $xml->localName =='page') { fwrite($fout, "\n"); } elseif ($xml->nodeType == XMLREADER::ELEMENT && $xml->localName =='par') { fwrite($fout, ''); } elseif ($xml->nodeType == XMLREADER::END_ELEMENT && $xml->localName =='formatting') { for ($i = 0; $i < $hi_count; ++$i) { $output .= ''; } $hi_count = 0; } elseif ($xml->nodeType == XMLREADER::END_ELEMENT && $xml->localName =='line') { fwrite($fout, $output.''); $output =''; } elseif ($xml->nodeType == XMLREADER::END_ELEMENT && $xml->localName =='page') { fwrite($fout, "\n\n"); } elseif ($xml->nodeType == XMLREADER::END_ELEMENT && $xml->localName =='par') { fwrite($fout, "\n"); } } fwrite($fout, $tei_end); // close down script $xml->close($fn_in) or exit("Failed to close input xml file: {$fn_in} \n");; fclose($fhd) or exit("Failed to close input xsl file: {$fn_hd} \n"); fclose($fout) or exit("Failed to close output text file: {$fn_out} \n"); echo "\nGoodbye from ABBYY to TEI by XMLReader\n"; ?>