org.dkpro.core
dkpro-core-norvig-asl
diff --git a/dkpro-core-nlp4j-asl/LICENSE.txt b/dkpro-core-nlp4j-asl/LICENSE.txt
deleted file mode 100644
index d645695673..0000000000
--- a/dkpro-core-nlp4j-asl/LICENSE.txt
+++ /dev/null
@@ -1,202 +0,0 @@
-
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/dkpro-core-nlp4j-asl/pom.xml b/dkpro-core-nlp4j-asl/pom.xml
deleted file mode 100644
index 9efd176531..0000000000
--- a/dkpro-core-nlp4j-asl/pom.xml
+++ /dev/null
@@ -1,165 +0,0 @@
-
-
- 4.0.0
-
-
- dkpro-core-asl
- org.dkpro.core
- 3.0.0-SNAPSHOT
- ../dkpro-core-asl
-
-
- dkpro-core-nlp4j-asl
- jar
- DKPro Core ASL - NLP4J
- https://dkpro.github.io/dkpro-core/
-
-
- 1.1.3
-
-
-
-
- org.apache.uima
- uimaj-core
-
-
- org.apache.uima
- uimafit-core
-
-
- org.apache.commons
- commons-lang3
-
-
- edu.emory.mathcs.nlp
- nlp4j-api
- ${nlp4j.version}
-
-
-
- org.dkpro.core
- dkpro-core-api-metadata-asl
- ${project.version}
-
-
- org.dkpro.core
- dkpro-core-api-resources-asl
- ${project.version}
-
-
- org.dkpro.core
- dkpro-core-api-segmentation-asl
- ${project.version}
-
-
- org.dkpro.core
- dkpro-core-api-lexmorph-asl
- ${project.version}
-
-
- org.dkpro.core
- dkpro-core-api-syntax-asl
- ${project.version}
-
-
- org.dkpro.core
- dkpro-core-api-ner-asl
- ${project.version}
-
-
- org.dkpro.core
- dkpro-core-api-io-asl
- ${project.version}
-
-
- org.dkpro.core
- dkpro-core-api-parameter-asl
- ${project.version}
-
-
- eu.openminted.share.annotations
- omtd-share-annotations-api
-
-
- org.dkpro.core
- dkpro-core-testing-asl
- ${project.version}
- test
-
-
- de.tudarmstadt.ukp.dkpro.core
- de.tudarmstadt.ukp.dkpro.core.nlp4j-model-tagger-en-default
- test
-
-
- de.tudarmstadt.ukp.dkpro.core
- de.tudarmstadt.ukp.dkpro.core.nlp4j-model-ner-en-default
- test
-
-
- de.tudarmstadt.ukp.dkpro.core
- de.tudarmstadt.ukp.dkpro.core.nlp4j-model-parser-en-default
- test
-
-
-
-
-
- de.tudarmstadt.ukp.dkpro.core
- de.tudarmstadt.ukp.dkpro.core.nlp4j-model-tagger-en-default
- 20160802.0
-
-
- de.tudarmstadt.ukp.dkpro.core
- de.tudarmstadt.ukp.dkpro.core.nlp4j-model-ner-en-default
- 20160802.0
-
-
- de.tudarmstadt.ukp.dkpro.core
- de.tudarmstadt.ukp.dkpro.core.nlp4j-model-parser-en-default
- 20160802.0
-
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-dependency-plugin
-
-
-
- de.tudarmstadt.ukp.dkpro.core:de.tudarmstadt.ukp.dkpro.core.nlp4j-model-tagger-en-default
- de.tudarmstadt.ukp.dkpro.core:de.tudarmstadt.ukp.dkpro.core.nlp4j-model-ner-en-default
- de.tudarmstadt.ukp.dkpro.core:de.tudarmstadt.ukp.dkpro.core.nlp4j-model-parser-en-default
-
-
-
-
-
-
-
diff --git a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JDependencyParser.java b/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JDependencyParser.java
deleted file mode 100644
index 5d907558d3..0000000000
--- a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JDependencyParser.java
+++ /dev/null
@@ -1,253 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.nlp4j;
-
-import static org.apache.uima.fit.util.JCasUtil.select;
-import static org.apache.uima.fit.util.JCasUtil.selectCovered;
-import static org.apache.uima.util.Level.INFO;
-import static org.dkpro.core.api.resources.MappingProviderFactory.createDependencyMappingProvider;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-import java.util.Set;
-import java.util.TreeSet;
-
-import org.apache.commons.lang3.StringUtils;
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.descriptor.ResourceMetaData;
-import org.apache.uima.fit.descriptor.TypeCapability;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.dkpro.core.api.parameter.ComponentParameters;
-import org.dkpro.core.api.resources.MappingProvider;
-import org.dkpro.core.api.resources.ModelProviderBase;
-import org.dkpro.core.nlp4j.internal.EmoryNlp2Uima;
-import org.dkpro.core.nlp4j.internal.EmoryNlpUtils;
-import org.dkpro.core.nlp4j.internal.OnlineComponentTagsetDescriptionProvider;
-import org.dkpro.core.nlp4j.internal.Uima2EmoryNlp;
-
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
-import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
-import edu.emory.mathcs.nlp.common.util.NLPUtils;
-import edu.emory.mathcs.nlp.component.dep.DEPState;
-import edu.emory.mathcs.nlp.component.template.OnlineComponent;
-import edu.emory.mathcs.nlp.component.template.node.NLPNode;
-import eu.openminted.share.annotations.api.Component;
-import eu.openminted.share.annotations.api.DocumentationResource;
-import eu.openminted.share.annotations.api.constants.OperationType;
-
-/**
- * Emory NLP4J dependency parser.
- */
-@Component(OperationType.DEPENDENCY_PARSER)
-@ResourceMetaData(name = "NLP4J Dependency Parser")
-@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}")
-@TypeCapability(
- inputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence",
- "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS"},
- outputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency"})
-public class Nlp4JDependencyParser
- extends JCasAnnotator_ImplBase
-{
- /**
- * Log the tag set(s) when a model is loaded.
- */
- public static final String PARAM_PRINT_TAGSET = ComponentParameters.PARAM_PRINT_TAGSET;
- @ConfigurationParameter(name = PARAM_PRINT_TAGSET, mandatory = true, defaultValue = "false")
- private boolean printTagSet;
-
- /**
- * Use this language instead of the document language to resolve the model and tag set mapping.
- */
- public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE;
- @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false)
- private String language;
-
- /**
- * Variant of a model the model. Used to address a specific model if here are multiple models
- * for one language.
- */
- public static final String PARAM_VARIANT = ComponentParameters.PARAM_VARIANT;
- @ConfigurationParameter(name = PARAM_VARIANT, mandatory = false)
- private String variant;
-
- /**
- * URI of the model artifact. This can be used to override the default model resolving
- * mechanism and directly address a particular model.
- *
- * The URI format is {@code mvn:${groupId}:${artifactId}:${version}}. Remember to set
- * the variant parameter to match the artifact. If the artifact contains the model in
- * a non-default location, you also have to specify the model location parameter, e.g.
- * {@code classpath:/model/path/in/artifact/model.bin}.
- */
- public static final String PARAM_MODEL_ARTIFACT_URI =
- ComponentParameters.PARAM_MODEL_ARTIFACT_URI;
- @ConfigurationParameter(name = PARAM_MODEL_ARTIFACT_URI, mandatory = false)
- protected String modelArtifactUri;
-
- /**
- * Location from which the model is read.
- */
- public static final String PARAM_MODEL_LOCATION = ComponentParameters.PARAM_MODEL_LOCATION;
- @ConfigurationParameter(name = PARAM_MODEL_LOCATION, mandatory = false)
- private String modelLocation;
-
- /**
- * Enable/disable type mapping.
- */
- public static final String PARAM_MAPPING_ENABLED = ComponentParameters.PARAM_MAPPING_ENABLED;
- @ConfigurationParameter(name = PARAM_MAPPING_ENABLED, mandatory = true, defaultValue =
- ComponentParameters.DEFAULT_MAPPING_ENABLED)
- protected boolean mappingEnabled;
-
- /**
- * Location of the mapping file for part-of-speech tags to UIMA types.
- */
- public static final String PARAM_DEPENDENCY_MAPPING_LOCATION =
- ComponentParameters.PARAM_DEPENDENCY_MAPPING_LOCATION;
- @ConfigurationParameter(name = PARAM_DEPENDENCY_MAPPING_LOCATION, mandatory = false)
- private String dependencyMappingLocation;
-
- /**
- * Process anyway, even if the model relies on features that are not supported by this
- * component.
- */
- public static final String PARAM_IGNORE_MISSING_FEATURES = "ignoreMissingFeatures";
- @ConfigurationParameter(name = PARAM_IGNORE_MISSING_FEATURES, mandatory = true, defaultValue = "false")
- protected boolean ignoreMissingFeatures;
-
- private Nlp4JDependencyParserModelProvider modelProvider;
- private MappingProvider mappingProvider;
-
- @Override
- public void initialize(UimaContext aContext)
- throws ResourceInitializationException
- {
- super.initialize(aContext);
-
- modelProvider = new Nlp4JDependencyParserModelProvider(this);
-
- mappingProvider = createDependencyMappingProvider(this, dependencyMappingLocation, language,
- modelProvider);
- }
-
- @Override
- public void process(JCas aJCas)
- throws AnalysisEngineProcessException
- {
- CAS cas = aJCas.getCas();
-
- modelProvider.configure(cas);
- mappingProvider.configure(cas);
-
- for (Sentence sentence : select(aJCas, Sentence.class)) {
- List tokens = selectCovered(aJCas, Token.class, sentence);
- NLPNode[] nodes = Uima2EmoryNlp.convertSentence(tokens);
-
- // Process the sentences - new results will be stored in the existing NLPNodes
- modelProvider.getResource().process(nodes);
-
- EmoryNlp2Uima.convertDependencies(aJCas, tokens, nodes, mappingProvider);
- }
- }
-
- private class Nlp4JDependencyParserModelProvider
- extends ModelProviderBase>>
- {
- public Nlp4JDependencyParserModelProvider(Object aObject)
- {
- super(aObject, "nlp4j", "parser");
-
- setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
- setDefault(LOCATION,
- "classpath:/de/tudarmstadt/ukp/dkpro/core/nlp4j/lib/parser-${language}-${variant}.properties");
- }
-
- @Override
- protected OnlineComponent> produceResource(InputStream aStream)
- throws Exception
- {
- String language = getAggregatedProperties().getProperty(LANGUAGE);
-
- if (!language.equals("en")) {
- throw new IllegalArgumentException(new Throwable(
- "Emory NLP4J supports only English"));
- }
-
- EmoryNlpUtils.initGlobalLexica();
-
- // Load the POS tagger model from the location the model provider offers
- OnlineComponent> component = (OnlineComponent)
- NLPUtils.getComponent(aStream);
-
- // Extract tagset information from the model
- OnlineComponentTagsetDescriptionProvider> tsdp =
- new OnlineComponentTagsetDescriptionProvider>(
- getResourceMetaData().getProperty("dependency.tagset"), Dependency.class,
- component)
- {
- @Override
- public Set listTags(String aLayer, String aTagsetName)
- {
- Set cleanTags = new TreeSet();
-
- for (String tag : super.listTags(aLayer, aTagsetName)) {
- String t = StringUtils.substringAfterLast(tag, "_");
- if (t.length() > 0) {
- cleanTags.add(t);
- }
- }
-
- return cleanTags;
- }
- };
- addTagset(tsdp);
-
- if (printTagSet) {
- getContext().getLogger().log(INFO, tsdp.toString());
- }
-
- Set features = EmoryNlpUtils.extractFeatures(component);
- getLogger().info("Model uses these features: " + features);
-
-
- Set unsupportedFeatures = EmoryNlpUtils.extractUnsupportedFeatures(component,
- "dependency_label", "valency");
- if (!unsupportedFeatures.isEmpty()) {
- String message = "Model these uses unsupported features: " + unsupportedFeatures;
- if (ignoreMissingFeatures) {
- getLogger().warn(message);
- }
- else {
- throw new IOException(message);
- }
- }
-
- return component;
- }
- };
-}
diff --git a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JLemmatizer.java b/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JLemmatizer.java
deleted file mode 100644
index 0aabc1e872..0000000000
--- a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JLemmatizer.java
+++ /dev/null
@@ -1,121 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.nlp4j;
-
-import static org.apache.uima.fit.util.JCasUtil.select;
-
-import java.io.IOException;
-import java.net.URL;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.descriptor.ResourceMetaData;
-import org.apache.uima.fit.descriptor.TypeCapability;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.dkpro.core.api.parameter.ComponentParameters;
-import org.dkpro.core.api.resources.ModelProviderBase;
-
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
-import edu.emory.mathcs.nlp.common.util.StringUtils;
-import edu.emory.mathcs.nlp.component.morph.MorphAnalyzer;
-import edu.emory.mathcs.nlp.component.morph.english.EnglishMorphAnalyzer;
-import eu.openminted.share.annotations.api.Component;
-import eu.openminted.share.annotations.api.DocumentationResource;
-import eu.openminted.share.annotations.api.constants.OperationType;
-
-/**
- * Emory NLP4J lemmatizer. This is a lower-casing lemmatizer.
- */
-@Component(OperationType.LEMMATIZER)
-@ResourceMetaData(name = "NLP4J Lemmatizer")
-@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}")
-@TypeCapability(
- inputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence",
- "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS"},
- outputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma" })
-public class Nlp4JLemmatizer
- extends JCasAnnotator_ImplBase
-{
- /**
- * Use this language instead of the document language to resolve the model.
- */
- public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE;
- @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false)
- protected String language;
-
- private ModelProviderBase modelProvider;
-
- @Override
- public void initialize(UimaContext aContext)
- throws ResourceInitializationException
- {
- super.initialize(aContext);
-
- modelProvider = new ModelProviderBase() {
- {
- setContextObject(Nlp4JLemmatizer.this);
- setDefault(LOCATION, NOT_REQUIRED + "-${language}");
- setOverride(LANGUAGE, language);
- }
-
- @Override
- protected MorphAnalyzer produceResource(URL aUrl)
- throws IOException
- {
- String language = getAggregatedProperties().getProperty(LANGUAGE);
-
- if (!language.equals("en")) {
- throw new IllegalArgumentException(new Throwable(
- "Emory NLP4J supports only English"));
- }
-
- return new EnglishMorphAnalyzer();
- }
- };
- }
-
- @Override
- public void process(JCas aJCas)
- throws AnalysisEngineProcessException
- {
- modelProvider.configure(aJCas.getCas());
-
- MorphAnalyzer lemmatizer = modelProvider.getResource();
-
- for (Token t : select(aJCas, Token.class)) {
- String pos = null;
- if (t.getPos() != null) {
- pos = t.getPos().getPosValue();
- }
-
- Lemma lemma = new Lemma(aJCas, t.getBegin(), t.getEnd());
- lemma.setValue(lemmatizer.lemmatize(StringUtils.toSimplifiedForm(t.getText()),
- pos));
- lemma.addToIndexes();
-
- t.setLemma(lemma);
- }
- }
-}
diff --git a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JNamedEntityRecognizer.java b/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JNamedEntityRecognizer.java
deleted file mode 100644
index 3da583e65e..0000000000
--- a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JNamedEntityRecognizer.java
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.nlp4j;
-
-import static org.apache.uima.fit.util.JCasUtil.select;
-import static org.apache.uima.fit.util.JCasUtil.selectCovered;
-import static org.apache.uima.util.Level.INFO;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.descriptor.ResourceMetaData;
-import org.apache.uima.fit.descriptor.TypeCapability;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.dkpro.core.api.parameter.ComponentParameters;
-import org.dkpro.core.api.resources.MappingProvider;
-import org.dkpro.core.api.resources.MappingProviderFactory;
-import org.dkpro.core.api.resources.ModelProviderBase;
-import org.dkpro.core.nlp4j.internal.EmoryNlp2Uima;
-import org.dkpro.core.nlp4j.internal.EmoryNlpUtils;
-import org.dkpro.core.nlp4j.internal.OnlineComponentTagsetDescriptionProvider;
-import org.dkpro.core.nlp4j.internal.Uima2EmoryNlp;
-
-import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
-import edu.emory.mathcs.nlp.common.util.NLPUtils;
-import edu.emory.mathcs.nlp.component.ner.NERState;
-import edu.emory.mathcs.nlp.component.template.OnlineComponent;
-import edu.emory.mathcs.nlp.component.template.node.NLPNode;
-import eu.openminted.share.annotations.api.Component;
-import eu.openminted.share.annotations.api.DocumentationResource;
-import eu.openminted.share.annotations.api.constants.OperationType;
-
-/**
- * Emory NLP4J name finder wrapper.
- */
-@Component(OperationType.NAMED_ENTITITY_RECOGNIZER)
-@ResourceMetaData(name = "NLP4J Named Entity Recognizer")
-@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}")
-@TypeCapability(
- inputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence",
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
- "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS",
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma" },
- outputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity" })
-public class Nlp4JNamedEntityRecognizer
- extends JCasAnnotator_ImplBase
-{
- /**
- * Log the tag set(s) when a model is loaded.
- */
- public static final String PARAM_PRINT_TAGSET = ComponentParameters.PARAM_PRINT_TAGSET;
- @ConfigurationParameter(name = PARAM_PRINT_TAGSET, mandatory = true, defaultValue = "false")
- protected boolean printTagSet;
-
- /**
- * Use this language instead of the document language to resolve the model.
- */
- public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE;
- @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false)
- protected String language;
-
- /**
- * Variant of a model the model. Used to address a specific model if here are multiple models
- * for one language.
- */
- public static final String PARAM_VARIANT = ComponentParameters.PARAM_VARIANT;
- @ConfigurationParameter(name = PARAM_VARIANT, mandatory = false)
- protected String variant;
-
- /**
- * URI of the model artifact. This can be used to override the default model resolving
- * mechanism and directly address a particular model.
- *
- * The URI format is {@code mvn:${groupId}:${artifactId}:${version}}. Remember to set
- * the variant parameter to match the artifact. If the artifact contains the model in
- * a non-default location, you also have to specify the model location parameter, e.g.
- * {@code classpath:/model/path/in/artifact/model.bin}.
- */
- public static final String PARAM_MODEL_ARTIFACT_URI =
- ComponentParameters.PARAM_MODEL_ARTIFACT_URI;
- @ConfigurationParameter(name = PARAM_MODEL_ARTIFACT_URI, mandatory = false)
- protected String modelArtifactUri;
-
- /**
- * Location from which the model is read.
- */
- public static final String PARAM_MODEL_LOCATION = ComponentParameters.PARAM_MODEL_LOCATION;
- @ConfigurationParameter(name = PARAM_MODEL_LOCATION, mandatory = false)
- protected String modelLocation;
-
- /**
- * Location of the mapping file for named entity tags to UIMA types.
- */
- public static final String PARAM_NAMED_ENTITY_MAPPING_LOCATION =
- ComponentParameters.PARAM_NAMED_ENTITY_MAPPING_LOCATION;
- @ConfigurationParameter(name = PARAM_NAMED_ENTITY_MAPPING_LOCATION, mandatory = false)
- protected String mappingLocation;
-
- /**
- * Process anyway, even if the model relies on features that are not supported by this
- * component.
- */
- public static final String PARAM_IGNORE_MISSING_FEATURES = "ignoreMissingFeatures";
- @ConfigurationParameter(name = PARAM_IGNORE_MISSING_FEATURES, mandatory = true, defaultValue = "false")
- protected boolean ignoreMissingFeatures;
-
- private Nlp4JNamedEntityRecognizerModelProvider modelProvider;
- private MappingProvider mappingProvider;
-
- @Override
- public void initialize(UimaContext aContext)
- throws ResourceInitializationException
- {
- super.initialize(aContext);
-
- modelProvider = new Nlp4JNamedEntityRecognizerModelProvider(this);
-
- mappingProvider = MappingProviderFactory.createNerMappingProvider(this, mappingLocation,
- language, variant, modelProvider);
- }
-
- @Override
- public void process(JCas aJCas)
- throws AnalysisEngineProcessException
- {
- CAS cas = aJCas.getCas();
- modelProvider.configure(cas);
- mappingProvider.configure(cas);
-
- for (Sentence sentence : select(aJCas, Sentence.class)) {
- List tokens = selectCovered(aJCas, Token.class, sentence);
- NLPNode[] nodes = Uima2EmoryNlp.convertSentence(tokens);
-
- // Process the sentences - new results will be stored in the existing NLPNodes
- modelProvider.getResource().process(nodes);
-
- EmoryNlp2Uima.convertNamedEntities(cas, tokens, nodes, mappingProvider);
- }
- }
-
- private class Nlp4JNamedEntityRecognizerModelProvider
- extends ModelProviderBase>>
- {
- public Nlp4JNamedEntityRecognizerModelProvider(Object aOwner)
- {
- super(aOwner, "nlp4j", "ner");
- setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
- setDefault(LOCATION,
- "classpath:/de/tudarmstadt/ukp/dkpro/core/nlp4j/lib/ner-${language}-${variant}.properties");
- }
-
- @Override
- protected OnlineComponent> produceResource(InputStream aStream)
- throws Exception
- {
- String language = getAggregatedProperties().getProperty(LANGUAGE);
-
- if (!language.equals("en")) {
- throw new IllegalArgumentException(new Throwable(
- "Emory NLP4J supports only English"));
- }
-
- EmoryNlpUtils.initGlobalLexica();
-
- // Load the POS tagger model from the location the model provider offers
- OnlineComponent> component = (OnlineComponent) NLPUtils
- .getComponent(aStream);
-
- // Extract tagset information from the model
- OnlineComponentTagsetDescriptionProvider> tsdp =
- new OnlineComponentTagsetDescriptionProvider>(
- getResourceMetaData().getProperty("ner.tagset"), POS.class, component);
- // addTagset(tsdp);
-
- if (printTagSet) {
- getContext().getLogger().log(INFO, tsdp.toString());
- }
-
- Set features = EmoryNlpUtils.extractFeatures(component);
- getLogger().info("Model uses these features: " + features);
-
- Set unsupportedFeatures = EmoryNlpUtils.extractUnsupportedFeatures(component,
- "named_entity_tag");
- if (!unsupportedFeatures.isEmpty()) {
- String message = "Model these uses unsupported features: " + unsupportedFeatures;
- if (ignoreMissingFeatures) {
- getLogger().warn(message);
- }
- else {
- throw new IOException(message);
- }
- }
-
- return component;
- }
- };
-}
diff --git a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JPosTagger.java b/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JPosTagger.java
deleted file mode 100644
index ad66128277..0000000000
--- a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JPosTagger.java
+++ /dev/null
@@ -1,237 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.nlp4j;
-
-import static org.apache.uima.fit.util.JCasUtil.select;
-import static org.apache.uima.fit.util.JCasUtil.selectCovered;
-import static org.apache.uima.util.Level.INFO;
-import static org.dkpro.core.api.resources.MappingProviderFactory.createPosMappingProvider;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.List;
-import java.util.Set;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.cas.CAS;
-import org.apache.uima.fit.component.JCasAnnotator_ImplBase;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.descriptor.ResourceMetaData;
-import org.apache.uima.fit.descriptor.TypeCapability;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.dkpro.core.api.parameter.ComponentParameters;
-import org.dkpro.core.api.resources.MappingProvider;
-import org.dkpro.core.api.resources.ModelProviderBase;
-import org.dkpro.core.nlp4j.internal.EmoryNlp2Uima;
-import org.dkpro.core.nlp4j.internal.EmoryNlpUtils;
-import org.dkpro.core.nlp4j.internal.OnlineComponentTagsetDescriptionProvider;
-import org.dkpro.core.nlp4j.internal.Uima2EmoryNlp;
-
-import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
-import edu.emory.mathcs.nlp.common.util.NLPUtils;
-import edu.emory.mathcs.nlp.component.pos.POSState;
-import edu.emory.mathcs.nlp.component.template.OnlineComponent;
-import edu.emory.mathcs.nlp.component.template.node.NLPNode;
-import eu.openminted.share.annotations.api.Component;
-import eu.openminted.share.annotations.api.DocumentationResource;
-import eu.openminted.share.annotations.api.constants.OperationType;
-
-/**
- * Part-of-Speech annotator using Emory NLP4J. Requires {@link Sentence}s to be annotated before.
- */
-@Component(OperationType.PART_OF_SPEECH_TAGGER)
-@ResourceMetaData(name = "NLP4J POS-Tagger")
-@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}")
-@TypeCapability(
- inputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence" },
- outputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS" })
-public class Nlp4JPosTagger
- extends JCasAnnotator_ImplBase
-{
- /**
- * Use this language instead of the document language to resolve the model.
- */
- public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE;
- @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false)
- protected String language;
-
- /**
- * Override the default variant used to locate the model.
- */
- public static final String PARAM_VARIANT = ComponentParameters.PARAM_VARIANT;
- @ConfigurationParameter(name = PARAM_VARIANT, mandatory = false)
- protected String variant;
-
- /**
- * URI of the model artifact. This can be used to override the default model resolving
- * mechanism and directly address a particular model.
- *
- * The URI format is {@code mvn:${groupId}:${artifactId}:${version}}. Remember to set
- * the variant parameter to match the artifact. If the artifact contains the model in
- * a non-default location, you also have to specify the model location parameter, e.g.
- * {@code classpath:/model/path/in/artifact/model.bin}.
- */
- public static final String PARAM_MODEL_ARTIFACT_URI =
- ComponentParameters.PARAM_MODEL_ARTIFACT_URI;
- @ConfigurationParameter(name = PARAM_MODEL_ARTIFACT_URI, mandatory = false)
- protected String modelArtifactUri;
-
- /**
- * Load the model from this location instead of locating the model automatically.
- */
- public static final String PARAM_MODEL_LOCATION = ComponentParameters.PARAM_MODEL_LOCATION;
- @ConfigurationParameter(name = PARAM_MODEL_LOCATION, mandatory = false)
- protected String modelLocation;
-
- /**
- * Enable/disable type mapping.
- */
- public static final String PARAM_MAPPING_ENABLED = ComponentParameters.PARAM_MAPPING_ENABLED;
- @ConfigurationParameter(name = PARAM_MAPPING_ENABLED, mandatory = true, defaultValue =
- ComponentParameters.DEFAULT_MAPPING_ENABLED)
- protected boolean mappingEnabled;
-
- /**
- * Load the part-of-speech tag to UIMA type mapping from this location instead of locating
- * the mapping automatically.
- */
- public static final String PARAM_POS_MAPPING_LOCATION =
- ComponentParameters.PARAM_POS_MAPPING_LOCATION;
- @ConfigurationParameter(name = PARAM_POS_MAPPING_LOCATION, mandatory = false)
- protected String posMappingLocation;
-
- /**
- * Log the tag set(s) when a model is loaded.
- */
- public static final String PARAM_PRINT_TAGSET = ComponentParameters.PARAM_PRINT_TAGSET;
- @ConfigurationParameter(name = PARAM_PRINT_TAGSET, mandatory = true, defaultValue = "false")
- protected boolean printTagSet;
-
- /**
- * Process anyway, even if the model relies on features that are not supported by this
- * component.
- */
- public static final String PARAM_IGNORE_MISSING_FEATURES = "ignoreMissingFeatures";
- @ConfigurationParameter(name = PARAM_IGNORE_MISSING_FEATURES, mandatory = true, defaultValue = "false")
- protected boolean ignoreMissingFeatures;
-
- private Nlp4JPosTaggerModelProvider modelProvider;
- private MappingProvider mappingProvider;
-
- @Override
- public void initialize(UimaContext aContext)
- throws ResourceInitializationException
- {
- super.initialize(aContext);
-
- modelProvider = new Nlp4JPosTaggerModelProvider(this);
-
- // General setup of the mapping provider in initialize()
- mappingProvider = createPosMappingProvider(this, posMappingLocation, language,
- modelProvider);
- }
-
- @Override
- public void process(JCas aJCas)
- throws AnalysisEngineProcessException
- {
- CAS cas = aJCas.getCas();
-
- // Document-specific configuration of model and mapping provider in process()
- modelProvider.configure(cas);
-
- // Mind the mapping provider must be configured after the model provider as it uses the
- // model metadata
- mappingProvider.configure(cas);
-
- for (Sentence sentence : select(aJCas, Sentence.class)) {
- List tokens = selectCovered(aJCas, Token.class, sentence);
- NLPNode[] nodes = Uima2EmoryNlp.convertSentence(tokens);
-
- // Process the sentences - new results will be stored in the existing NLPNodes
- modelProvider.getResource().process(nodes);
-
- EmoryNlp2Uima.convertPos(cas, tokens, nodes, mappingProvider);
- }
- }
-
- private class Nlp4JPosTaggerModelProvider
- extends ModelProviderBase>>
- {
- public Nlp4JPosTaggerModelProvider(Object aOwner)
- {
- super(aOwner, "nlp4j", "tagger");
- setDefault(GROUP_ID, "de.tudarmstadt.ukp.dkpro.core");
- setDefault(LOCATION,
- "classpath:/de/tudarmstadt/ukp/dkpro/core/nlp4j/lib/tagger-${language}-${variant}.properties");
- }
-
- @Override
- protected OnlineComponent> produceResource(InputStream aStream)
- throws Exception
- {
- String language = getAggregatedProperties().getProperty(LANGUAGE);
-
- if (!language.equals("en")) {
- throw new IllegalArgumentException(new Throwable(
- "Emory NLP4J supports only English"));
- }
-
- EmoryNlpUtils.initGlobalLexica();
-
- // Load the POS tagger model from the location the model provider offers
- OnlineComponent> component = (OnlineComponent)
- NLPUtils.getComponent(aStream);
-
- // Extract tagset information from the model
- OnlineComponentTagsetDescriptionProvider> tsdp =
- new OnlineComponentTagsetDescriptionProvider<>(
- getResourceMetaData().getProperty("pos.tagset"), POS.class, component);
- addTagset(tsdp);
-
- if (printTagSet) {
- getContext().getLogger().log(INFO, tsdp.toString());
- }
-
- Set features = EmoryNlpUtils.extractFeatures(component);
- getLogger().info("Model uses these features: " + features);
-
-
- Set unsupportedFeatures = EmoryNlpUtils.extractUnsupportedFeatures(component);
- if (!unsupportedFeatures.isEmpty()) {
- String message = "Model these uses unsupported features: " + unsupportedFeatures;
- if (ignoreMissingFeatures) {
- getLogger().warn(message);
- }
- else {
- throw new IOException(message);
- }
- }
-
- // Create a new POS tagger instance from the loaded model
- return component;
- }
- };
-}
diff --git a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JSegmenter.java b/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JSegmenter.java
deleted file mode 100644
index 1c158c10ad..0000000000
--- a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/Nlp4JSegmenter.java
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.nlp4j;
-
-import java.io.IOException;
-import java.net.URL;
-import java.util.List;
-
-import org.apache.uima.UimaContext;
-import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
-import org.apache.uima.fit.descriptor.ConfigurationParameter;
-import org.apache.uima.fit.descriptor.ResourceMetaData;
-import org.apache.uima.fit.descriptor.TypeCapability;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
-import org.dkpro.core.api.parameter.ComponentParameters;
-import org.dkpro.core.api.resources.CasConfigurableProviderBase;
-import org.dkpro.core.api.resources.ModelProviderBase;
-import org.dkpro.core.api.segmentation.SegmenterBase;
-
-import edu.emory.mathcs.nlp.component.tokenizer.EnglishTokenizer;
-import edu.emory.mathcs.nlp.component.tokenizer.Tokenizer;
-import edu.emory.mathcs.nlp.component.tokenizer.token.Token;
-import eu.openminted.share.annotations.api.DocumentationResource;
-
-/**
- * Segmenter using Emory NLP4J.
- */
-@ResourceMetaData(name = "NLP4J Segmenter")
-@DocumentationResource("${docbase}/component-reference.html#engine-${shortClassName}")
-@TypeCapability(
- outputs = {
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token",
- "de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence" })
-public class Nlp4JSegmenter
- extends SegmenterBase
-{
- /**
- * Use this language instead of the document language to resolve the model.
- */
- public static final String PARAM_LANGUAGE = ComponentParameters.PARAM_LANGUAGE;
- @ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false)
- protected String language;
-
- private CasConfigurableProviderBase modelProvider;
-
- @Override
- public void initialize(UimaContext aContext)
- throws ResourceInitializationException
- {
- super.initialize(aContext);
-
- modelProvider = new ModelProviderBase()
- {
- {
- setContextObject(Nlp4JSegmenter.this);
- setDefault(LOCATION, NOT_REQUIRED + "-${language}");
- setOverride(LANGUAGE, language);
- }
-
- @Override
- protected Tokenizer produceResource(URL aUrl)
- throws IOException
- {
- String language = getAggregatedProperties().getProperty(LANGUAGE);
-
- if (!language.equals("en")) {
- throw new IllegalArgumentException(new Throwable(
- "Emory NLP4J supports only English"));
- }
-
- return new EnglishTokenizer();
- }
- };
- }
-
- @Override
- protected void process(JCas aJCas, String aText, int aZoneBegin)
- throws AnalysisEngineProcessException
- {
- modelProvider.configure(aJCas.getCas());
- Tokenizer segmenter = modelProvider.getResource();
-
- List> sentences = segmenter.segmentize(aText);
-
- for (List sentence : sentences) {
- // Tokens actually start only at index 1 - the 0 index is some odd "@#r$%"
- for (Token token : sentence) {
- createToken(aJCas, aZoneBegin + token.getStartOffset(),
- aZoneBegin + token.getEndOffset());
- }
-
- int sentBegin = aZoneBegin + sentence.get(0).getStartOffset();
- int sentEnd = aZoneBegin + sentence.get(sentence.size() - 1).getEndOffset();
-
- createSentence(aJCas, sentBegin, sentEnd);
- }
- }
-}
diff --git a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/internal/EmoryNlp2Uima.java b/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/internal/EmoryNlp2Uima.java
deleted file mode 100644
index a821577675..0000000000
--- a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/internal/EmoryNlp2Uima.java
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.nlp4j.internal;
-
-import java.util.List;
-
-import org.apache.uima.cas.CAS;
-import org.apache.uima.cas.Feature;
-import org.apache.uima.cas.Type;
-import org.apache.uima.jcas.JCas;
-import org.dkpro.core.api.io.BilouDecoder;
-import org.dkpro.core.api.lexmorph.pos.POSUtils;
-import org.dkpro.core.api.resources.MappingProvider;
-
-import de.tudarmstadt.ukp.dkpro.core.api.lexmorph.type.pos.POS;
-import de.tudarmstadt.ukp.dkpro.core.api.ner.type.NamedEntity;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
-import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
-import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.DependencyFlavor;
-import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.ROOT;
-import edu.emory.mathcs.nlp.component.template.node.NLPNode;
-
-public class EmoryNlp2Uima
-{
- public static void convertPos(CAS aCas, List aTokens, NLPNode[] aNodes,
- MappingProvider aMappingProvider)
- {
- // EmoryNLP tokens start at 1
- int i = 1;
- for (Token t : aTokens) {
- String tag = aNodes[i].getPartOfSpeechTag();
-
- // Convert the tag produced by the tagger to an UIMA type, create an annotation
- // of this type, and add it to the document.
- Type posTag = aMappingProvider.getTagType(tag);
- POS posAnno = (POS) aCas.createAnnotation(posTag, t.getBegin(), t.getEnd());
- // To save memory, we typically intern() tag strings
- posAnno.setPosValue(tag != null ? tag.intern() : null);
- POSUtils.assignCoarseValue(posAnno);
- posAnno.addToIndexes();
-
- // Connect the POS annotation to the respective token annotation
- t.setPos(posAnno);
- i++;
- }
- }
-
- public static void convertDependencies(JCas aJCas, List aTokens, NLPNode[] aNodes,
- MappingProvider aMappingProvider)
- {
- for (int i = 1; i < aNodes.length; i++) {
- NLPNode depNode = aNodes[i];
- NLPNode govNode = depNode.getDependencyHead();
- String label = depNode.getDependencyLabel();
-
- // FIXME Also extract the semantic heads and store them with dependency flavor
- // ENHANCED
-
- if (govNode.getID() != 0) {
- Type depRel = aMappingProvider.getTagType(label);
- Dependency dep = (Dependency) aJCas.getCas().createFS(depRel);
- dep.setDependencyType(label != null ? label.intern() : null);
- dep.setDependent(aTokens.get(depNode.getID() - 1));
- dep.setGovernor(aTokens.get(govNode.getID() - 1));
- dep.setBegin(dep.getDependent().getBegin());
- dep.setEnd(dep.getDependent().getEnd());
- dep.setFlavor(DependencyFlavor.BASIC);
- dep.addToIndexes();
- }
- else {
- Dependency dep = new ROOT(aJCas);
- dep.setDependencyType(label);
- dep.setDependent(aTokens.get(depNode.getID() - 1));
- dep.setGovernor(aTokens.get(depNode.getID() - 1));
- dep.setBegin(dep.getDependent().getBegin());
- dep.setEnd(dep.getDependent().getEnd());
- dep.setFlavor(DependencyFlavor.BASIC);
- dep.addToIndexes();
- }
- }
- }
-
- public static void convertNamedEntities(CAS aCas, List aTokens, NLPNode[] aNodes,
- MappingProvider aMappingProvider)
- {
- Type neType = aCas.getTypeSystem().getType(NamedEntity.class.getName());
- Feature valueFeat = neType.getFeatureByBaseName("value");
-
- String[] neTags = new String[aNodes.length - 1];
- for (int i = 1; i < aNodes.length; i++) {
- neTags[i - 1] = aNodes[i].getNamedEntityTag();
- }
-
- BilouDecoder decoder = new BilouDecoder(aCas, valueFeat, aMappingProvider);
- decoder.decode(aTokens, neTags);
- }
-}
diff --git a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/internal/EmoryNlpUtils.java b/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/internal/EmoryNlpUtils.java
deleted file mode 100644
index acb0249f2a..0000000000
--- a/dkpro-core-nlp4j-asl/src/main/java/org/dkpro/core/nlp4j/internal/EmoryNlpUtils.java
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * Copyright 2017
- * Ubiquitous Knowledge Processing (UKP) Lab
- * Technische Universität Darmstadt
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.dkpro.core.nlp4j.internal;
-
-import static java.util.Arrays.asList;
-
-import java.io.IOException;
-import java.io.ObjectInputStream;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.parsers.ParserConfigurationException;
-
-import org.dkpro.core.api.resources.ResourceUtils;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-
-import edu.emory.mathcs.nlp.common.collection.tree.PrefixTree;
-import edu.emory.mathcs.nlp.common.util.IOUtils;
-import edu.emory.mathcs.nlp.component.template.OnlineComponent;
-import edu.emory.mathcs.nlp.component.template.feature.FeatureItem;
-import edu.emory.mathcs.nlp.component.template.feature.Field;
-import edu.emory.mathcs.nlp.component.template.lexicon.GlobalLexica;
-import edu.emory.mathcs.nlp.component.template.lexicon.GlobalLexicon;
-import edu.emory.mathcs.nlp.component.template.node.NLPNode;
-
-public class EmoryNlpUtils
-{
- private static GlobalLexica lexica;
-
- public static synchronized void initGlobalLexica()
- throws IOException, ParserConfigurationException
- {
- if (lexica != null) {
- return;
- }
-
- // Cf. classpath:/edu/emory/mathcs/nlp/configuration/config-decode-en.xml
-
- String LEXICA_PREFIX = "classpath:/edu/emory/mathcs/nlp/lexica/";
-
- DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
- Document xmlDoc = builder.newDocument();
- Element root = xmlDoc.createElement("dummy");
-
- lexica = new GlobalLexica<>(root);
-
- lexica.setAmbiguityClasses(new GlobalLexicon