diff --git a/dkpro-core-io-text-asl/src/main/java/org/dkpro/core/io/text/TokenizedTextWriter.java b/dkpro-core-io-text-asl/src/main/java/org/dkpro/core/io/text/TokenizedTextWriter.java index 6252fb14c..cd6a869e0 100644 --- a/dkpro-core-io-text-asl/src/main/java/org/dkpro/core/io/text/TokenizedTextWriter.java +++ b/dkpro-core-io-text-asl/src/main/java/org/dkpro/core/io/text/TokenizedTextWriter.java @@ -19,8 +19,6 @@ import java.io.File; import java.io.IOException; -import java.io.OutputStream; - import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.descriptor.ConfigurationParameter; @@ -157,9 +155,7 @@ public void initialize(UimaContext context) public void process(JCas aJCas) throws AnalysisEngineProcessException { - try { - OutputStream outputStream = getOutputStream(aJCas, extension); - + try (var outputStream = getOutputStream(aJCas, extension)) { /* iterate over sentences */ for (String[] line : sequenceGenerator.tokenSequences(aJCas)) { if (line.length > 0) { diff --git a/dkpro-core-io-text-asl/src/test/java/org/dkpro/core/io/text/TextWriterTest.java b/dkpro-core-io-text-asl/src/test/java/org/dkpro/core/io/text/TextWriterTest.java index 3e2d79a7d..0ce531263 100644 --- a/dkpro-core-io-text-asl/src/test/java/org/dkpro/core/io/text/TextWriterTest.java +++ b/dkpro-core-io-text-asl/src/test/java/org/dkpro/core/io/text/TextWriterTest.java @@ -25,7 +25,6 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; -import java.io.InputStream; import java.io.PrintStream; import org.apache.commons.io.IOUtils; @@ -43,23 +42,22 @@ public class TextWriterTest { private @TempDir File outputPath; - + @Test public void testWriteWithDocumentUri() throws Exception { AnalysisEngineDescription writer = createEngineDescription(TextWriter.class, - TextWriter.PARAM_TARGET_LOCATION, outputPath, - TextWriter.PARAM_STRIP_EXTENSION, true, - TextWriter.PARAM_OVERWRITE, true); - + TextWriter.PARAM_TARGET_LOCATION, outputPath, TextWriter.PARAM_STRIP_EXTENSION, + true, TextWriter.PARAM_OVERWRITE, true); + JCas jcas = JCasFactory.createJCas(); - + DocumentMetaData dmd = DocumentMetaData.create(jcas); dmd.setDocumentBaseUri("file:/dummy"); dmd.setDocumentUri("file:/dummy/text1.txt"); - + runPipeline(jcas, writer); - + assertTrue(new File(outputPath, "text1.txt").exists()); } @@ -67,24 +65,22 @@ public void testWriteWithDocumentUri() throws Exception public void testWriteWithDocumentId() throws Exception { AnalysisEngineDescription writer = createEngineDescription(TextWriter.class, - TextWriter.PARAM_TARGET_LOCATION, outputPath, - TextWriter.PARAM_STRIP_EXTENSION, true, - TextWriter.PARAM_OVERWRITE, true); - + TextWriter.PARAM_TARGET_LOCATION, outputPath, TextWriter.PARAM_STRIP_EXTENSION, + true, TextWriter.PARAM_OVERWRITE, true); + JCas jcas = JCasFactory.createJCas(); - + DocumentMetaData dmd = DocumentMetaData.create(jcas); dmd.setCollectionId("dummy"); dmd.setDocumentId("text1.txt"); - + runPipeline(jcas, writer); - + assertTrue(new File(outputPath, "text1.txt").exists()); } @Test - public void testStdOut() - throws Exception + public void testStdOut() throws Exception { final String text = "This is a test"; @@ -110,25 +106,26 @@ public void testStdOut() } @Test - public void testCompressed() - throws Exception + public void testCompressed() throws Exception { - String text = StringUtils.repeat("This is a test. ", 100000); - - JCas jcas = JCasFactory.createJCas(); + var text = StringUtils.repeat("This is a test. ", 100000); + + var jcas = JCasFactory.createJCas(); jcas.setDocumentText(text); - - DocumentMetaData meta = DocumentMetaData.create(jcas); + + var meta = DocumentMetaData.create(jcas); meta.setDocumentId("dummy"); - AnalysisEngineDescription writer = createEngineDescription(TextWriter.class, - TextWriter.PARAM_COMPRESSION, CompressionMethod.GZIP, + var writer = createEngineDescription( // + TextWriter.class, // + TextWriter.PARAM_COMPRESSION, CompressionMethod.GZIP, // TextWriter.PARAM_TARGET_LOCATION, outputPath); runPipeline(jcas, writer); - - File input = new File(outputPath, "dummy.txt.gz"); - InputStream is = CompressionUtils.getInputStream(input.getPath(), - new FileInputStream(input)); - assertEquals(text, IOUtils.toString(is)); + + var input = new File(outputPath, "dummy.txt.gz"); + try (var is = CompressionUtils.getInputStream(input.getPath(), + new FileInputStream(input))) { + assertEquals(text, IOUtils.toString(is)); + } } } diff --git a/dkpro-core-io-text-asl/src/test/java/org/dkpro/core/io/text/TokenizedTextWriterTest.java b/dkpro-core-io-text-asl/src/test/java/org/dkpro/core/io/text/TokenizedTextWriterTest.java index 77037428b..908773a50 100644 --- a/dkpro-core-io-text-asl/src/test/java/org/dkpro/core/io/text/TokenizedTextWriterTest.java +++ b/dkpro-core-io-text-asl/src/test/java/org/dkpro/core/io/text/TokenizedTextWriterTest.java @@ -46,33 +46,33 @@ public class TokenizedTextWriterTest private @TempDir File tempDir; @Test - public void testDefault() - throws UIMAException, IOException + public void testDefault() throws UIMAException, IOException { String text = "This is the 1st sentence .\nHere is another sentence ."; File targetFile = new File(tempDir, "TokenizedTextWriterTest.out"); File tokenized = new File("src/test/resources/tokenizedTexts/textTokenized.txt"); - AnalysisEngineDescription writer = createEngineDescription(TokenizedTextWriter.class, - TokenizedTextWriter.PARAM_TARGET_LOCATION, targetFile, - TokenizedTextWriter.PARAM_SINGULAR_TARGET, true, + AnalysisEngineDescription writer = createEngineDescription( // + TokenizedTextWriter.class, // + TokenizedTextWriter.PARAM_TARGET_LOCATION, targetFile, // + TokenizedTextWriter.PARAM_SINGULAR_TARGET, true, // TokenizedTextWriter.PARAM_OVERWRITE, true); TestRunner.runTest("id", writer, "en", text); assertTrue(FileUtils.contentEqualsIgnoreEOL(tokenized, targetFile, "UTF-8")); } @Test - public void testMultipleFiles() - throws UIMAException, IOException + public void testMultipleFiles() throws UIMAException, IOException { String text = "This is the 1st sentence .\nHere is another sentence ."; File targetDir = tempDir; File targetFile = new File(targetDir, "id.txt"); File tokenized = new File("src/test/resources/tokenizedTexts/textTokenized.txt"); - AnalysisEngineDescription writer = createEngineDescription(TokenizedTextWriter.class, - TokenizedTextWriter.PARAM_TARGET_LOCATION, targetDir, - TokenizedTextWriter.PARAM_SINGULAR_TARGET, false, + AnalysisEngineDescription writer = createEngineDescription( // + TokenizedTextWriter.class, // + TokenizedTextWriter.PARAM_TARGET_LOCATION, targetDir, // + TokenizedTextWriter.PARAM_SINGULAR_TARGET, false, // TokenizedTextWriter.PARAM_OVERWRITE, true); TestRunner.runTest("id", writer, "en", text); assertTrue(targetDir.isDirectory()); @@ -81,11 +81,9 @@ public void testMultipleFiles() } @Test - public void testTokens() - throws UIMAException, IOException + public void testTokens() throws UIMAException, IOException { - File targetFile = new File(tempDir, - "TokenizedTextWriterTokensTest.out"); + File targetFile = new File(tempDir, "TokenizedTextWriterTokensTest.out"); String text = "This is the 1st sentence .\nHere is another sentence ."; String typeName = Token.class.getTypeName(); File tokenized = new File("src/test/resources/tokenizedTexts/textTokenized.txt"); @@ -99,8 +97,7 @@ public void testTokens() } @Test - public void testLemmas() - throws IOException, UIMAException + public void testLemmas() throws IOException, UIMAException { File targetFile = new File(tempDir, "lemmas.out"); targetFile.deleteOnExit(); @@ -132,10 +129,11 @@ public void testLemmas() Sentence sentence = new Sentence(jCas, 0, 13); sentence.addToIndexes(jCas); - AnalysisEngineDescription writer = createEngineDescription(TokenizedTextWriter.class, - TokenizedTextWriter.PARAM_TARGET_LOCATION, targetFile, - TokenizedTextWriter.PARAM_FEATURE_PATH, featurePath, - TokenizedTextWriter.PARAM_SINGULAR_TARGET, true, + AnalysisEngineDescription writer = createEngineDescription( // + TokenizedTextWriter.class, // + TokenizedTextWriter.PARAM_TARGET_LOCATION, targetFile, // + TokenizedTextWriter.PARAM_FEATURE_PATH, featurePath, // + TokenizedTextWriter.PARAM_SINGULAR_TARGET, true, // TokenizedTextWriter.PARAM_OVERWRITE, true); SimplePipeline.runPipeline(jCas, writer); @@ -146,27 +144,26 @@ public void testLemmas() } @Test - public void testStopwords() - throws UIMAException, IOException + public void testStopwords() throws UIMAException, IOException { - File targetFile = new File(tempDir,"TokenizedTextWriterNoStopwords.out"); + var targetFile = new File(tempDir, "TokenizedTextWriterNoStopwords.out"); targetFile.deleteOnExit(); - File tokenized = new File("src/test/resources/tokenizedTexts/textTokenizedNoStopwords.txt"); - String text = "This is the 1st sentence .\nHere is another sentence ."; - String stopwordsFile = "src/test/resources/stopwords_en.txt"; - - AnalysisEngineDescription writer = createEngineDescription(TokenizedTextWriter.class, - TokenizedTextWriter.PARAM_TARGET_LOCATION, targetFile, - TokenizedTextWriter.PARAM_STOPWORDS_FILE, stopwordsFile, - TokenizedTextWriter.PARAM_SINGULAR_TARGET, true, + var tokenized = new File("src/test/resources/tokenizedTexts/textTokenizedNoStopwords.txt"); + var text = "This is the 1st sentence .\nHere is another sentence ."; + var stopwordsFile = "src/test/resources/stopwords_en.txt"; + + var writer = createEngineDescription( // + TokenizedTextWriter.class, // + TokenizedTextWriter.PARAM_TARGET_LOCATION, targetFile, // + TokenizedTextWriter.PARAM_STOPWORDS_FILE, stopwordsFile, // + TokenizedTextWriter.PARAM_SINGULAR_TARGET, true, // TokenizedTextWriter.PARAM_OVERWRITE, true); TestRunner.runTest("id", writer, "en", text); assertTrue(FileUtils.contentEqualsIgnoreEOL(tokenized, targetFile, "UTF-8")); } @Test - public void testNumbers() - throws UIMAException, IOException + public void testNumbers() throws UIMAException, IOException { File targetFile = new File(tempDir, "TokenizedTextWriterNoStopwords.out"); targetFile.deleteOnExit(); @@ -174,28 +171,28 @@ public void testNumbers() String text = "This is 1 sentence .\nHere is 2 sentences , or even 2.5 ."; String numbersRegex = "^[0-9]+(\\.[0-9]*)?$"; - AnalysisEngineDescription writer = createEngineDescription(TokenizedTextWriter.class, - TokenizedTextWriter.PARAM_TARGET_LOCATION, targetFile, - TokenizedTextWriter.PARAM_NUMBER_REGEX, numbersRegex, - TokenizedTextWriter.PARAM_SINGULAR_TARGET, true, + AnalysisEngineDescription writer = createEngineDescription( // + TokenizedTextWriter.class, // + TokenizedTextWriter.PARAM_TARGET_LOCATION, targetFile, // + TokenizedTextWriter.PARAM_NUMBER_REGEX, numbersRegex, // + TokenizedTextWriter.PARAM_SINGULAR_TARGET, true, // TokenizedTextWriter.PARAM_OVERWRITE, true); TestRunner.runTest("id", writer, "en", text); assertTrue(FileUtils.contentEqualsIgnoreEOL(tokenized, targetFile, "UTF-8")); } @Test - public void testNoSentences() - throws IOException, UIMAException + public void testNoSentences() throws IOException, UIMAException { - File targetFile = new File(tempDir, - "TokenizedTextWriterNoSentences.out"); + File targetFile = new File(tempDir, "TokenizedTextWriterNoSentences.out"); File tokenized = new File("src/test/resources/tokenizedTexts/textNoSentences.txt"); String text = "This is the 1st sentence . Here is another sentence ."; - AnalysisEngineDescription writer = createEngineDescription(TokenizedTextWriter.class, - TokenizedTextWriter.PARAM_TARGET_LOCATION, targetFile, - TokenizedTextWriter.PARAM_SINGULAR_TARGET, true, - TokenizedTextWriter.PARAM_OVERWRITE, true, + AnalysisEngineDescription writer = createEngineDescription( // + TokenizedTextWriter.class, // + TokenizedTextWriter.PARAM_TARGET_LOCATION, targetFile, // + TokenizedTextWriter.PARAM_SINGULAR_TARGET, true, // + TokenizedTextWriter.PARAM_OVERWRITE, true, // TokenizedTextWriter.PARAM_COVERING_TYPE, null); TestRunner.runTest("id", writer, "en", text); assertTrue(FileUtils.contentEqualsIgnoreEOL(tokenized, targetFile, "UTF-8")); diff --git a/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/TestRunner.java b/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/TestRunner.java index 2ff7e61f8..06b90ce6a 100644 --- a/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/TestRunner.java +++ b/dkpro-core-testing-asl/src/main/java/org/dkpro/core/testing/TestRunner.java @@ -23,6 +23,7 @@ import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.fit.testing.factory.TokenBuilder; +import org.apache.uima.fit.util.LifeCycleUtil; import org.apache.uima.jcas.JCas; import org.dkpro.core.api.resources.ResourceObjectProviderBase; @@ -77,7 +78,16 @@ public static JCas runTest(String aDocumentId, AnalysisEngineDescription aEngine String aLanguage, String aDocument) throws UIMAException { - return runTest(aDocumentId, createEngine(aEngine), aLanguage, aDocument); + var engine = createEngine(aEngine); + try { + return runTest(aDocumentId, engine, aLanguage, aDocument); + } + finally { + if (engine != null) { + LifeCycleUtil.collectionProcessComplete(engine); + LifeCycleUtil.destroy(engine); + } + } } /** @@ -130,7 +140,7 @@ public static JCas runTest(String aDocumentId, AnalysisEngine aEngine, String aL System.setProperty(ResourceObjectProviderBase.PROP_REPO_OFFLINE, "true"); } offline = true; - + JCas jcas = aEngine.newJCas(); if (aDocumentId != null) { @@ -145,30 +155,30 @@ public static JCas runTest(String aDocumentId, AnalysisEngine aEngine, String aL tb.buildTokens(jcas, aDocument); aEngine.process(jcas); - -// DkproTestContext context = DkproTestContext.get(); -// if (context != null) { -// File folder = new File("target/test-output/" + context.getTestOutputFolderName()); -// if (!folder.exists()) { -// FileUtils.deleteQuietly(folder); -// } -// folder.mkdirs(); -// -// try (OutputStream docOS = new FileOutputStream(new File(folder, "output.xmi"))) { -// XmiCasSerializer.serialize(jcas.getCas(), null, docOS, true, null); -// } -// catch (Exception e) { -// throw new AnalysisEngineProcessException(e); -// } -// } - + + // DkproTestContext context = DkproTestContext.get(); + // if (context != null) { + // File folder = new File("target/test-output/" + context.getTestOutputFolderName()); + // if (!folder.exists()) { + // FileUtils.deleteQuietly(folder); + // } + // folder.mkdirs(); + // + // try (OutputStream docOS = new FileOutputStream(new File(folder, "output.xmi"))) { + // XmiCasSerializer.serialize(jcas.getCas(), null, docOS, true, null); + // } + // catch (Exception e) { + // throw new AnalysisEngineProcessException(e); + // } + // } + AssertAnnotations.assertValid(jcas); - + return jcas; } - + private static boolean offline = true; - + public static void autoloadModelsOnNextTestRun() { offline = false;