xpath('/results/allNames/entity'); foreach ($got_entity as $e) { // get nameString $nameString = $e->nameString; // if single level name, ie no blanks space in nameString, then ignore $result = stripos($nameString, ' '); if ($result === false) { continue; } // test if non-taxon name, if so do not use to mark up XML // *** hard coded **** names for wrong values returned by uBio that should not be marked up // please add to this test as required // current names drawn from BoB51, which also gives test order // 'and' occurs 318 times in BoB51, 'the' 142 times, and 'new' 6 times // yes, I could code this as an array of text values and loop through them, but this inline approach is more efficient // also note, stripos ignores case and is binary safe so will cope with variations and unicode characters $result = stripos($nameString, 'and'); if ($result !== false) { continue; } $result = stripos($nameString, 'the'); if ($result !== false) { continue; } $result = stripos($nameString, 'new'); if ($result !== false) { continue; } // note the testing above is not perfect - there is one example in Bob51 where the genus abbreviation T. is expanded to T[he] // convert nameString back to how it appears in the text by removing the expansion and replacing it with a period // yes, this could be coded as a one line regexp, but this approach is more efficient $start = stripos($nameString, '['); if ($start !== false) { $end = stripos($nameString, ']')+1; $str1 = mb_substr($nameString, 0, $start); // multi-byte substring to cope with unicode characters $str2 = mb_substr($nameString, $end); $nameString = $str1.'.'.$str2; } // if no wrong values present in nameString then get canonical version of name and keep processing $parsedName = $e->parsedName['canonical']; $genusName = $e->parsedName->component[0]; $speciesName = $e->parsedName->component[1]; $namebankID = $e->namebankID; // format values to taXMLit $new_text = ''.$nameString.''.$parsedName.''.$genusName.''.$speciesName.''.$namebankID.''."\n"; // store formatted name as key with taXMLit as value into array for later searching in source XML $temp = strval($nameString); $a[$temp] = $new_text; } // end of foreach $got_entity // now process found taxon names, adding taXMLit data into TEI XML file accordingly // create an array with just the taxon names $key_a = array_keys($a); while ($buffer = fgets($fin)) { // read TEI XML one line at a time fwrite($fout, $buffer); // write out TEI XML to new file regardless foreach ($key_a as $n) { // loop through taxon name array $pos = strpos($buffer, $n); // checking to see if taxon name in TEI line just processes if($pos !== false) { // if it is echo "Found {$n}\n"; // tell the user which taxon name has been found fwrite($fout, $a[$n]); // use taxon name as key to retrieve appropriate taXMLit data and write it to new file } } } // in progress message echo "\nCompleted generating taXMLit elements\n"; // close down script $fin = fclose($fin) or exit("Unable to close input xml file {$fn_in}\n"); $fout = fclose($fout) or exit("Unable to close output xml file {$fn_out}\n"); echo "\nGoodbye from apply FindIT\n"; ?>