Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
berndmoos committed Nov 21, 2024
1 parent 6be0bce commit 211cb6f
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 3 deletions.
55 changes: 53 additions & 2 deletions src/org/exmaralda/common/corpusbuild/EXBBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import java.util.stream.Stream;
import org.exmaralda.common.jdomutilities.IOUtilities;
import org.exmaralda.partitureditor.jexmaralda.BasicTranscription;
Expand All @@ -24,6 +27,7 @@
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.Namespace;
import org.jdom.xpath.XPath;
import org.xml.sax.SAXException;

Expand All @@ -38,6 +42,8 @@ public class EXBBuilder {
String uniqueSpeakerDistinction = "descendant::abbreviation";
String segmentation = "default";

Set<String> deleteMetaKeys = new HashSet<>();


public EXBBuilder(String corpusName, File topDirectory, String uniqueSpeakerDistinction, String segmentation){
this.corpusName = corpusName;
Expand All @@ -46,6 +52,10 @@ public EXBBuilder(String corpusName, File topDirectory, String uniqueSpeakerDist
this.segmentation = segmentation;
}

public void setDeleteMetaKeys(Set<String> deleteMetaKeys){
this.deleteMetaKeys = deleteMetaKeys;
}

public void build() throws IOException, SAXException, JexmaraldaException, JDOMException{
List<File> exbFiles = collectEXBFiles();
segmentEXBFiles(exbFiles);
Expand Down Expand Up @@ -80,19 +90,26 @@ private void constructComa(List<File> exbFiles) throws SAXException, JexmaraldaE
comaDocument.getRootElement().setAttribute("uniqueSpeakerDistinction", "//speaker/" + this.uniqueSpeakerDistinction);
comaDocument.getRootElement().setAttribute("Name", corpusName);
comaDocument.getRootElement().setAttribute("Id", corpusName);
comaDocument.getRootElement().setAttribute("noNamespaceSchemaLocation", "http://www.exmaralda.org/xml/comacorpus.xsd", Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance"));
Namespace xsiNamespace = Namespace.getNamespace("xsi", "http://www.w3.org/2001/XMLSchema-instance");
comaDocument.getRootElement().addNamespaceDeclaration(xsiNamespace);

Element corpusDataElement = new Element("CorpusData");
comaDocument.getRootElement().addContent(corpusDataElement);

for (File exbFile : exbFiles){
BasicTranscription exb = new BasicTranscription(exbFile.getAbsolutePath());
Element communicationElement = new Element("Communication")
.setAttribute("Name", exb.getHead().getMetaInformation().getTranscriptionName())
.setAttribute("Id", exb.getHead().getMetaInformation().getTranscriptionName());
comaDocument.getRootElement().addContent(communicationElement);
corpusDataElement.addContent(communicationElement);

// Metadata
Element communicationDescriptionElement = new Element("Description");
communicationElement.addContent(communicationDescriptionElement);
UDInformationHashtable udMetaInformation = exb.getHead().getMetaInformation().getUDMetaInformation();
for (String attribute : udMetaInformation.getAllAttributes()){
if (deleteMetaKeys.contains(attribute)) continue;
String value = udMetaInformation.getValueOfAttribute(attribute);
Element keyElement = new Element("Key")
.setAttribute("Name", attribute)
Expand Down Expand Up @@ -162,12 +179,45 @@ private void constructComa(List<File> exbFiles) throws SAXException, JexmaraldaE
transcriptionElement2.addContent(transcriptionDescriptionElement2);
transcriptionDescriptionElement2.addContent(new Element("Key").setAttribute("Name", "segmented").setText("true"));

// Recordings
/*
<Recording Id="RID7D37BDAC-9DDB-B6CD-34D3-A50A67CBA980">
<Name>AnneWill</Name>
<RecordingDuration>360003</RecordingDuration>
<Media Id="MID56777C50-5115-BD39-50AC-905E7CBC1DE9">
<Description>
<Key Name="Type">digital</Key>
</Description>
<NSLink>AnneWill/AnneWill.mp4</NSLink>
</Media>
*/
Vector<String> referencedFiles = exb.getHead().getMetaInformation().getReferencedFiles();
int count = 0;
for (String referencedFile : referencedFiles){
count++;
Element recordingElement = new Element("Recording")
.setAttribute("Id", exb.getHead().getMetaInformation().getTranscriptionName()+ "_REC_" + Integer.toString(count));
recordingElement.addContent(new Element("Name").setText(new File(referencedFile).getName()));
communicationElement.addContent(recordingElement);
Element mediaElement = new Element("Media")
.setAttribute("Id", exb.getHead().getMetaInformation().getTranscriptionName()+ "_MED_" + Integer.toString(count));
recordingElement.addContent(mediaElement);
Element mediaDescriptionElement = new Element("Description");
mediaElement.addContent(mediaDescriptionElement);
mediaDescriptionElement.addContent(new Element("Key").setAttribute("Name", "type").setText("digital"));

relativePath = topDirectory.toPath().relativize(new File(referencedFile).toPath());
mediaElement.addContent(new Element("NSLink").setText(relativePath.toString().replace(File.separatorChar, '/')));


}


}

for (String id : speakerElements.keySet()){
comaDocument.getRootElement().addContent(speakerElements.get(id));
corpusDataElement.addContent(speakerElements.get(id));
}

File comaOutFile = new File(topDirectory, corpusName + ".coma");
Expand Down Expand Up @@ -203,6 +253,7 @@ private Element makeSpeakerElement(String sID, Speaker s) {
speakerElement.addContent(speakerDescriptionElement);
UDInformationHashtable udMetaInformation = s.getUDSpeakerInformation();
for (String attribute : udMetaInformation.getAllAttributes()){
if (deleteMetaKeys.contains(attribute)) continue;
String value = udMetaInformation.getValueOfAttribute(attribute);
Element keyElement = new Element("Key")
.setAttribute("Name", attribute)
Expand Down
8 changes: 7 additions & 1 deletion src/org/exmaralda/common/corpusbuild/TestEXBBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@

import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.exmaralda.partitureditor.jexmaralda.JexmaraldaException;
Expand All @@ -26,8 +28,12 @@ public static void main(String[] args) {
}

private void doit() {
EXBBuilder exbBuilder = new EXBBuilder("MANV", new File("C:\\UDE\\PILOT_MANV\\ZUMULT-CORPUS\\MANV"), "descendant::ud-information[@attribute-name='uniqueID']", "default");
try {
EXBBuilder exbBuilder = new EXBBuilder("MANV", new File("C:\\UDE\\PILOT_MANV\\ZUMULT-CORPUS\\MANV"), "descendant::ud-information[@attribute-name='uniqueID']", "default");
Set<String> dmk = new HashSet<>();
dmk.add("ELAN-Media-File");
dmk.add("ELAN-Mime-Type");
exbBuilder.setDeleteMetaKeys(dmk);
exbBuilder.build();
} catch (IOException | SAXException | JexmaraldaException | JDOMException ex) {
Logger.getLogger(TestEXBBuilder.class.getName()).log(Level.SEVERE, null, ex);
Expand Down

0 comments on commit 211cb6f

Please sign in to comment.