Skip to content

Commit

Permalink
#367 and #503
Browse files Browse the repository at this point in the history
  • Loading branch information
berndmoos committed Dec 10, 2024
1 parent 2005473 commit 1d4c871
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 60 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ public void writeNonSegmentedISOTEIToFile(BasicTranscription bt, String path) th
String result = sf.applyInternalStylesheetToString(EXMARaLDA2GENERIC_ISO_TEI_XSL, bt.toXML());
Document teiDoc = IOUtilities.readDocumentFromString(result);
setDocLanguage(teiDoc, language);
setTranscriptionDesc(teiDoc, "-", "-");
setTranscriptionDesc(teiDoc, "unspecified", "unspecified");
IOUtilities.writeDocumentToLocalFile(path, teiDoc);
System.out.println("[TEIConverter] Non segmented ISO TEI File written to " + path);
}
Expand Down Expand Up @@ -390,6 +390,9 @@ public BasicTranscription readISOTEIFromFile(String path) throws IOException{
}
bt.getHead().getMetaInformation().setReferencedFiles(correctedReferencedFiles);

// new 10-12-2024
String[] canonicalTierOrder = bt.getBody().makeCanonicalTierOrder(bt.getHead().getSpeakertable().getAllSpeakerIDs());
bt.getBody().reorderTiers(canonicalTierOrder);

return bt;
} catch (JDOMException | SAXException | JexmaraldaException | ParserConfigurationException | TransformerException ex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.swing.JFrame;
import javax.swing.JOptionPane;
import org.exmaralda.common.ExmaraldaApplication;
import org.exmaralda.common.dialogs.ProgressBarDialog;
import org.exmaralda.folker.utilities.PreferencesUtilities;
Expand Down
71 changes: 64 additions & 7 deletions src/org/exmaralda/tagging/TagDirectoryISOTEI.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,24 @@ public class TagDirectoryISOTEI {
//public static String[] OPT = {"-token","-lemma","-sgml","-no-unknown", "", ""};
public static String[] OPT = {"-token","-lemma","-sgml","-no-unknown"};

public TagDirectoryISOTEI() {
public String xpathToTokens;
public boolean applyPP;
public TreeTagger tt;
PostProcessingRules ppr;

public TagDirectoryISOTEI() throws IOException, JDOMException {
this(TTC, PF, ENC, OPT, TreeTaggableISOTEITranscription.XPATH_NO_XY, true);
}

public TagDirectoryISOTEI(String treeTaggerDirectory, String parameterFile, String encoding, String[] treeTaggerOptions, String xpathToTokens, boolean applyPP) throws IOException, JDOMException {
this.xpathToTokens = xpathToTokens;
this.applyPP = applyPP;

tt = new TreeTagger(treeTaggerDirectory, parameterFile, encoding, treeTaggerOptions);
tt.verbose = false;
ppr = new PostProcessingRules();
ppr.read(PostProcessingRules.FOLK_RULES);
}



Expand Down Expand Up @@ -63,13 +78,14 @@ public static void main(String[] args) {
Logger.getLogger(TagDirectoryISOTEI.class.getName()).log(Level.SEVERE, null, ex);
}
}



void doit(String[] args) throws IOException, JDOMException {
System.out.println("=================================");
String inputDir = args[0];
String outputDir = args[1];
String xpathToTokens = TreeTaggableISOTEITranscription.XPATH_NO_XY;
boolean applyPP = true;

if (args.length>2){
TTC = args[2];
PF = args[3];
Expand All @@ -84,6 +100,8 @@ void doit(String[] args) throws IOException, JDOMException {
if ("NO_DUMMIES".equals(args[6])){
xpathToTokens = TreeTaggableISOTEITranscription.XPATH_NO_DUMMIES;
}
tt = new TreeTagger(TTC, PF, ENC, OPT);
tt.verbose = false;
}
File in = new File(inputDir);
File out = new File(outputDir);
Expand All @@ -97,16 +115,14 @@ void doit(String[] args) throws IOException, JDOMException {
f.delete();
}
}
TreeTagger tt = new TreeTagger(TTC, PF, ENC, OPT);
tt.verbose = false;
File[] transcriptFiles = in.listFiles(new FilenameFilter(){
@Override
public boolean accept(File dir, String name) {
return (name.toLowerCase().endsWith(".xml"));
}
});

PostProcessingRules ppr = new PostProcessingRules();
ppr = new PostProcessingRules();
ppr.read(PostProcessingRules.FOLK_RULES);

CombinedPostProcessingRules pprCombined = new CombinedPostProcessingRules();
Expand Down Expand Up @@ -162,8 +178,49 @@ public boolean accept(File dir, String name) {
}
System.out.println("=================================");
count2++;
}
}
}

public void tagFile(File input, File output) throws JDOMException, IOException{
Document trDoc = FileIO.readDocumentFromLocalFile(input);

// get rid of all existing attributes for pos and lemma
List l = XPath.selectNodes(trDoc, "//@lemma|//@pos|//@p-pos");
for (Object o : l){
Attribute a = (Attribute)o;
a.detach();
}
File intermediate = File.createTempFile("ISO_TEI","TMP");
intermediate.deleteOnExit();
FileIO.writeDocumentToLocalFile(intermediate, trDoc);

System.out.println("[TagDirectoryISOTEI] Tagging " + input.getName());
TreeTaggableISOTEITranscription ttont = new TreeTaggableISOTEITranscription(intermediate, true);
ttont.setXPathToTokens(xpathToTokens);

File tempOutput = File.createTempFile("ISO_TEI","TMP");
System.out.println("[TagDirectoryISOTEI] " + tempOutput.getAbsolutePath() + " created.");
tempOutput.deleteOnExit();
tt.tag(ttont, tempOutput);
System.out.println("[TagDirectoryISOTEI] Tagging done");
SextantISOTEIIntegrator soi = new SextantISOTEIIntegrator(intermediate.getAbsolutePath());
soi.integrate(tempOutput.getAbsolutePath());
System.out.println("[TagDirectoryISOTEI] Writing " + output.getAbsolutePath());
soi.writeDocument(output.getAbsolutePath());


if (applyPP){
Document doc = FileIO.readDocumentFromLocalFile(output);

// "Ordinary" Post Processing Rules : 1:1
int count = ppr.applyISOTEI(doc);
System.out.println("[TagDirectoryISOTEI] Applied 1:1 post processing rules on " + count + " elements. " );

FileIO.writeDocumentToLocalFile(output, doc);
}

tempOutput.delete();

}

}
2 changes: 1 addition & 1 deletion src/org/exmaralda/tei/xml/exmaralda2isotei.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -338,7 +338,7 @@
</xsl:template>

<xsl:template match="event[../@type='d']">
<xsl:message>HERE I GO!</xsl:message>
<!-- <xsl:message>HERE I GO!</xsl:message> -->
<xsl:element name="incident" xmlns="http://www.tei-c.org/ns/1.0">
<xsl:if test="../@speaker">
<xsl:attribute name="who">
Expand Down
Loading

0 comments on commit 1d4c871

Please sign in to comment.