Skip to content

Commit

Permalink
Merge pull request #121 from europeana/EA-testingPageXML
Browse files Browse the repository at this point in the history
Ea testing page xml
  • Loading branch information
SrishtiSingh-eu authored Jun 15, 2023
2 parents 4625ade + 291f503 commit 1c51870
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,33 +3,37 @@
import eu.europeana.edm.media.MediaReference;
import eu.europeana.fulltext.alto.model.AltoPage;
import eu.europeana.fulltext.alto.parser.AltoParser;
import eu.europeana.fulltext.exception.XmlParsingException;
import org.xml.sax.InputSource;

import javax.xml.XMLConstants;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMResult;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamSource;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;

/**
* @author Hugo
* @since 4 Apr 2023
*/
public class PageXMLParser extends AltoParser {
private static final String XSLT_PATH = "etc/PageToAlto.xsl";
private final Transformer _transformer;
private final Transformer transformer;

public PageXMLParser() throws TransformerConfigurationException {
InputStream is = ClassLoader.getSystemClassLoader()
.getResourceAsStream(XSLT_PATH);
if (is == null) {
is = this.getClass().getResourceAsStream(XSLT_PATH);
public PageXMLParser() throws TransformerConfigurationException, IOException, XmlParsingException {
URL file = PageXMLParser.class.getClassLoader().getResource(XSLT_PATH);
if (file == null) {
throw new XmlParsingException("Unable to find file " + XSLT_PATH);
}
try (InputStream is = file.openStream()) {
TransformerFactory tf = TransformerFactory.newInstance();
tf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
tf.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
transformer = tf.newTransformer(new StreamSource(is));
}
TransformerFactory tf = TransformerFactory.newInstance();
tf.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, "");
tf.setAttribute(XMLConstants.ACCESS_EXTERNAL_STYLESHEET, "");
_transformer = tf.newTransformer(new StreamSource(is));
}

public AltoPage processPage(InputSource source, MediaReference ref) {
Expand All @@ -39,7 +43,7 @@ public AltoPage processPage(InputSource source, MediaReference ref) {
public AltoPage processPage(Source source, MediaReference ref) {
try {
DOMResult result = new DOMResult();
_transformer.transform(source, result);
transformer.transform(source, result);
return super.processPage(new DOMSource(result.getNode()), ref);
} catch (TransformerException e) {
throw new RuntimeException(e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.stream.StreamSource;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;


Expand All @@ -31,9 +32,8 @@ public FullTextPackage convert(AnnotationPreview annotationPreview) throws Europ
// page xml first converts the xslt into alto then we can convert alto to EDM
return new AltoToFulltextConverter().getAltoToEDM(altoPage, annotationPreview, reference);

} catch (TransformerConfigurationException e) {
e.printStackTrace();
throw new XmlParsingException("Error configuring the transformer for type " +annotationPreview.getFulltextType().getMimeType());
} catch (TransformerConfigurationException | IOException e) {
throw new XmlParsingException("Error configuring the transformer for type " +annotationPreview.getFulltextType());
}

}
Expand Down
16 changes: 12 additions & 4 deletions k8s/overlays/cloud/cronjob.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ spec:
spec:
ttlSecondsAfterFinished: ${CRON_TTL_AFTER_FINISHED}
template:
metadata:
annotations:
fluentd/include: '${COLLECT_LOGS}'
fluentd/multiline: 'true'
spec:
containers:
- name: fulltext-annosync
Expand Down Expand Up @@ -56,13 +60,13 @@ spec:
suspend: ${SUSPEND_INDEXING}
concurrencyPolicy: Forbid
jobTemplate:
metadata:
annotations:
fluentd/include: '${COLLECT_LOGS}'
fluentd/multiline: 'true'
spec:
ttlSecondsAfterFinished: ${CRON_TTL_AFTER_FINISHED}
template:
metadata:
annotations:
fluentd/include: '${COLLECT_LOGS}'
fluentd/multiline: 'true'
spec:
containers:
- name: fulltext-indexing
Expand Down Expand Up @@ -117,6 +121,10 @@ spec:
spec:
ttlSecondsAfterFinished: ${CRON_TTL_AFTER_FINISHED}
template:
metadata:
annotations:
fluentd/include: '${COLLECT_LOGS}'
fluentd/multiline: 'true'
spec:
containers:
- name: fulltext-indexing
Expand Down

0 comments on commit 1c51870

Please sign in to comment.