diff --git a/README.md b/README.md
index b12e02f..6d13134 100644
--- a/README.md
+++ b/README.md
@@ -106,6 +106,10 @@ First of all it is necessary to download the following external files and depend
- [WordNet ESA](https://docs.google.com/uc?export=download&id=1I6oQqIeZva1CwLA96OkHFSZKiBfUgWLe)
- [WordNet LexSemResources](https://docs.google.com/uc?export=download&id=1TeYlsHbcCtxbsVVoBvttdVsvbKFHPbZn)
+Seconf of all it is necessary to download the following file and extract its content into gloveModel/ folder (at the root of the service)
+
+- [GloveModel](https://drive.google.com/file/d/1E-jkanZQSjXAuwx3EXyGKAyMQ8QBWobA/view?usp=sharing)
+
Then is necessary to configure the DKPRO_HOME variable with the resources directory path:
- export DKPRO_HOME=/path/dependency-detection/src/main/resources
diff --git a/gloveModel/.gitignore b/gloveModel/.gitignore
new file mode 100644
index 0000000..5e7d273
--- /dev/null
+++ b/gloveModel/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
diff --git a/libs/linguistic/rake/1.0/LICENSE.txt b/libs/linguistic/rake/1.0/LICENSE.txt
new file mode 100644
index 0000000..75dd38b
--- /dev/null
+++ b/libs/linguistic/rake/1.0/LICENSE.txt
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 Linguistic
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/libs/linguistic/rake/1.0/rake-1.0.jar b/libs/linguistic/rake/1.0/rake-1.0.jar
new file mode 100644
index 0000000..9c1f64b
Binary files /dev/null and b/libs/linguistic/rake/1.0/rake-1.0.jar differ
diff --git a/libs/linguistic/rake/1.0/rake-1.0.pom.xml b/libs/linguistic/rake/1.0/rake-1.0.pom.xml
new file mode 100644
index 0000000..66a3a9a
--- /dev/null
+++ b/libs/linguistic/rake/1.0/rake-1.0.pom.xml
@@ -0,0 +1,10 @@
+
+
+ 4.0.0
+ linguistic
+ rake
+ 1.0
+ POM was created from install:install-file
+
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index ed9c673..3104b88 100644
--- a/pom.xml
+++ b/pom.xml
@@ -53,6 +53,33 @@
spring-boot-starter-log4j2
+
+
+ org.apache.lucene
+ lucene-analyzers-common
+ 7.7.1
+
+
+ org.apache.lucene
+ lucene-core
+ 7.7.1
+
+
+ linguistic
+ rake
+ 1.0
+ system
+ ${project.basedir}/libs/linguistic/rake/1.0/rake-1.0.jar
+
+
+
+
+
+ de.jungblut.glove
+ glove
+ 0.3
+
+
diff --git a/src/main/java/com/gessi/dependency_detection/WordEmbedding.java b/src/main/java/com/gessi/dependency_detection/WordEmbedding.java
new file mode 100644
index 0000000..aee70f1
--- /dev/null
+++ b/src/main/java/com/gessi/dependency_detection/WordEmbedding.java
@@ -0,0 +1,56 @@
+package com.gessi.dependency_detection;
+
+import de.jungblut.glove.GloveRandomAccessReader;
+import de.jungblut.glove.impl.GloveBinaryRandomAccessReader;
+import de.jungblut.math.DoubleVector;
+
+import java.io.IOException;
+import java.nio.file.Paths;
+
+import static java.lang.Math.sqrt;
+
+public class WordEmbedding {
+
+ GloveRandomAccessReader db = new GloveBinaryRandomAccessReader(Paths.get("gloveModel"));
+
+ public WordEmbedding() throws IOException {
+ }
+
+
+ /**
+ * Computes the cosine similarity between two words, if these vectors exist in the underlying Glove model
+ * @param a first word
+ * @param b second word
+ * @return The cosine similarity between the two words
+ */
+ public Double computeSimilarity(String a, String b) throws IOException {
+ DoubleVector help1 = null, help2 = null;
+ if (db.contains(a)) help1 = db.get(a);
+ if (db.contains(b)) help2 = db.get(b);
+ if (help1 != null && help2 != null) {
+ return cosineSimilarity(help1,help2);
+ } else return -1.0;
+ }
+
+
+ private Double cosineSimilarity(DoubleVector help1, DoubleVector help2) {
+ double[] one=help1.toArray();
+ double[] two=help2.toArray();
+ int length=one.length;
+ Double sum = 0.0;
+ if (two.length>length) length=two.length;
+ for (int i=0;i(createException(e.toString(),"NLP Error"), HttpStatus.INTERNAL_SERVER_ERROR);
} catch (SimilarityException | LexicalSemanticResourceException e) {
return new ResponseEntity<>(createException(e.toString(),"Similarity Error"), HttpStatus.INTERNAL_SERVER_ERROR);
+ } catch (ExecutionException e) {
+ e.printStackTrace();
}
return new ResponseEntity<>(onjN, HttpStatus.OK);
}
diff --git a/src/main/java/com/gessi/dependency_detection/domain/KeywordTool.java b/src/main/java/com/gessi/dependency_detection/domain/KeywordTool.java
new file mode 100644
index 0000000..341083e
--- /dev/null
+++ b/src/main/java/com/gessi/dependency_detection/domain/KeywordTool.java
@@ -0,0 +1,8 @@
+package com.gessi.dependency_detection.domain;
+
+public enum KeywordTool {
+
+ RULE_BASED,
+ TFIDF_BASED
+
+}
diff --git a/src/main/java/com/gessi/dependency_detection/domain/Requirement.java b/src/main/java/com/gessi/dependency_detection/domain/Requirement.java
new file mode 100644
index 0000000..68d1f8f
--- /dev/null
+++ b/src/main/java/com/gessi/dependency_detection/domain/Requirement.java
@@ -0,0 +1,27 @@
+package com.gessi.dependency_detection.domain;
+
+public class Requirement {
+ String description;
+ String id;
+
+ public Requirement(String s, String s1) {
+ description=s1;
+ id=s;
+ }
+
+ public String getDescription() {
+ return description;
+ }
+
+ public void setDescription(String description) {
+ this.description = description;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public void setId(String id) {
+ this.id = id;
+ }
+}
diff --git a/src/main/java/com/gessi/dependency_detection/functionalities/NLPAnalyser.java b/src/main/java/com/gessi/dependency_detection/functionalities/NLPAnalyser.java
index f64e6fc..8165a7d 100644
--- a/src/main/java/com/gessi/dependency_detection/functionalities/NLPAnalyser.java
+++ b/src/main/java/com/gessi/dependency_detection/functionalities/NLPAnalyser.java
@@ -1,22 +1,22 @@
package com.gessi.dependency_detection.functionalities;
-import java.io.IOException;
-import java.io.InputStream;
import java.io.BufferedReader;
+import java.io.IOException;
import java.io.FileReader;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
+import java.io.InputStream;
+import java.util.*;
+import java.util.concurrent.ExecutionException;
+import com.gessi.dependency_detection.components.Node;
import com.gessi.dependency_detection.util.Control;
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
+import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
+import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.Dependency;
+import de.tudarmstadt.ukp.dkpro.core.clearnlp.ClearNlpLemmatizer;
+import de.tudarmstadt.ukp.dkpro.core.clearnlp.ClearNlpParser;
+import de.tudarmstadt.ukp.dkpro.core.clearnlp.ClearNlpPosTagger;
import dkpro.similarity.algorithms.api.SimilarityException;
-import org.apache.uima.UIMAException;
-import org.apache.uima.analysis_engine.AnalysisEngine;
-import org.apache.uima.analysis_engine.AnalysisEngineDescription;
-import org.apache.uima.fit.testing.factory.TokenBuilder;
-import org.apache.uima.fit.util.JCasUtil;
-import org.apache.uima.jcas.JCas;
-import org.apache.uima.resource.ResourceInitializationException;
import de.tudarmstadt.ukp.dkpro.lexsemresource.LexicalSemanticResource;
import de.tudarmstadt.ukp.dkpro.lexsemresource.core.ResourceFactory;
@@ -24,27 +24,23 @@
import de.tudarmstadt.ukp.dkpro.lexsemresource.exception.ResourceLoaderException;
import dkpro.similarity.algorithms.lsr.LexSemResourceComparator;
import dkpro.similarity.algorithms.lsr.path.WuPalmerComparator;
-
-import org.springframework.core.io.ClassPathResource;
-
-import com.gessi.dependency_detection.components.Node;
-
-import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
-import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
-
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Lemma;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Sentence;
-import de.tudarmstadt.ukp.dkpro.core.api.segmentation.type.Token;
-import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.dependency.*;
-
-import de.tudarmstadt.ukp.dkpro.core.clearnlp.ClearNlpLemmatizer;
-import de.tudarmstadt.ukp.dkpro.core.clearnlp.ClearNlpParser;
-import de.tudarmstadt.ukp.dkpro.core.clearnlp.ClearNlpPosTagger;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
+import org.apache.uima.UIMAException;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.analysis_engine.AnalysisEngineDescription;
+import org.apache.uima.fit.testing.factory.TokenBuilder;
+import org.apache.uima.fit.util.JCasUtil;
+import org.apache.uima.jcas.JCas;
+import org.apache.uima.resource.ResourceInitializationException;
+import org.springframework.core.io.ClassPathResource;
+
+import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngine;
+import static org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
+
public class NLPAnalyser {
@@ -67,6 +63,7 @@ public class NLPAnalyser {
public NLPAnalyser() {
super();
try {
+ System.out.println("Loading");
wordnet = ResourceFactory.getInstance().get("wordnet", "en");
wordnet.setIsCaseSensitive(false);
} catch (ResourceLoaderException e) {
@@ -75,15 +72,40 @@ public NLPAnalyser() {
}
}
-
/**
- * The approach of dependency detection
- * @param requirement
+ * Semantic similarity engine (DKPRO & WordNet)
+ * @param term1
+ * @param term2
* @return
- * @throws IOException
- * @throws ResourceInitializationException
- * @throws UIMAException
+ * @throws SimilarityException
+ * @throws LexicalSemanticResourceException
*/
+ public double semanticSimilarity(String term1, String term2)
+ throws SimilarityException, LexicalSemanticResourceException {
+
+ if (comparatorWN == null)
+ comparatorWN = new WuPalmerComparator(wordnet, wordnet.getRoot());
+ return comparatorWN.getSimilarity(term1, term2);
+
+ }
+
+ public Map prepareRequirements(Map requirements) throws InterruptedException, ExecutionException, IOException {
+ List recs=new ArrayList<>();
+ for (String s:requirements.keySet()) {
+ recs.add(new com.gessi.dependency_detection.domain.Requirement(s,requirements.get(s)));
+ }
+ Map keywords;
+ if (requirements.keySet().size()>100) {
+ TFIDFKeywordExtractor extractor=new TFIDFKeywordExtractor();
+ keywords=extractor.computeTFIDF(recs);
+ }
+ else {
+ RAKEKeywordExtractor extractor=new RAKEKeywordExtractor();
+ keywords=extractor.computeRake(recs);
+ }
+ return keywords;
+ }
+
public List requirementAnalysis(String requirement)
throws IOException, ResourceInitializationException, UIMAException {
@@ -126,6 +148,31 @@ public List requirementAnalysis(String requirement)
return synResult;
}
+ /**
+ * Debug
+ * Utility to read a file
+ * @param path
+ * @return
+ */
+ public List readFile(String path) {
+ ArrayList fileLines = new ArrayList<>();
+
+ try(FileReader fr = new FileReader(path);
+ BufferedReader br = new BufferedReader(fr)) {
+
+ String sCurrentLine;
+
+ while ((sCurrentLine = br.readLine()) != null) {
+ fileLines.add(sCurrentLine);
+ }
+
+ } catch (IOException e) {
+ Control.getInstance().showErrorMessage(e.getMessage());
+ }
+ return fileLines;
+ }
+//--------------------------------------------------------------RULE BASED
+
/**
* Noisy text cleaning
* Rule-based method
@@ -159,7 +206,7 @@ private String clearSentence(String sentence) {
// split the phrase correctly
sentence = sentence.replaceAll("\\.(\\s)", " . ");
sentence = sentence.replaceAll("\\s+", " ");
-
+
// Check the endpoint of the sentence
if (sentence.length() > 1) {
if (sentence.substring(sentence.length() - 1).equals(";")
@@ -173,7 +220,7 @@ private String clearSentence(String sentence) {
}
return sentence;
}
-
+
/**
* Tokenization (OpenNLP)
* @param requirmenet
@@ -245,45 +292,6 @@ public static JCas runParser(AnalysisEngine aEngine, String aLanguage, String aT
return jcas;
}
-
- /**
- * Lemmatization engine (clearNLP)
- * @param term
- * @return
- * @throws UIMAException
- */
- public String lemmatization(String term) throws UIMAException {
- if (lemmaEngine == null && tagger == null && lemma == null) {
-
- tagger = createEngineDescription(ClearNlpPosTagger.class);
- lemma = createEngineDescription(ClearNlpLemmatizer.class);
-
- lemmaEngine = createEngine(createEngineDescription(tagger, lemma));
- }
- JCas jcas = runParser(lemmaEngine, "en", term);
- Collection lemmas = JCasUtil.select(jcas, Lemma.class);
- String ret = "";
- String[] terms = term.split(" ");
- int i = 0;
- if (!lemmas.isEmpty()) {
- for (Lemma l : lemmas) {
- if (!l.getValue().matches("\\d+")) {
- if (!ret.equals(""))
- ret = ret.concat(" " + l.getValue());
- else
- ret = l.getValue();
- } else {
- if (!ret.equals(""))
- ret = ret.concat(" " + terms[i]);
- else
- ret = terms[i];
- }
- i++;
- }
- }
- return ret;
- }
-
/**
* Dependency parser engine (clearNLP)
* This function generates a dependency tree from the dependency parser results.
@@ -314,7 +322,6 @@ public Node dependencyParser(String aText) throws ResourceInitializationExceptio
}
return root;
}
-
/**
* Update the tree information
* @param tree
@@ -336,7 +343,6 @@ private Node fillTreeLinks(ArrayList tree) {
}
return root;
}
-
/**
* Find the parent of the node from the dependncy parser results
* @param tree
@@ -360,43 +366,47 @@ private int findParent(ArrayList tree, int parentId, int idx, boolean next
}
/**
- * Semantic similarity engine (DKPRO & WordNet)
- * @param term1
- * @param term2
+ * Lemmatization engine (clearNLP)
+ * @param term
* @return
- * @throws SimilarityException
- * @throws LexicalSemanticResourceException
+ * @throws UIMAException
*/
- public double semanticSimilarity(String term1, String term2)
- throws SimilarityException, LexicalSemanticResourceException {
+ public String lemmatization(String term) throws UIMAException {
+ if (lemmaEngine == null && tagger == null && lemma == null) {
- if (comparatorWN == null)
- comparatorWN = new WuPalmerComparator(wordnet, wordnet.getRoot());
- return comparatorWN.getSimilarity(term1, term2);
+ tagger = createEngineDescription(ClearNlpPosTagger.class);
+ lemma = createEngineDescription(ClearNlpLemmatizer.class);
+ lemmaEngine = createEngine(createEngineDescription(tagger, lemma));
+ }
+ JCas jcas = runParser(lemmaEngine, "en", term);
+ Collection lemmas = JCasUtil.select(jcas, Lemma.class);
+ String ret = "";
+ String[] terms = term.split(" ");
+ int i = 0;
+ if (!lemmas.isEmpty()) {
+ for (Lemma l : lemmas) {
+ if (!l.getValue().matches("\\d+")) {
+ if (!ret.equals(""))
+ ret = ret.concat(" " + l.getValue());
+ else
+ ret = l.getValue();
+ } else {
+ if (!ret.equals(""))
+ ret = ret.concat(" " + terms[i]);
+ else
+ ret = terms[i];
+ }
+ i++;
+ }
+ }
+ return ret;
}
- /**
- * Debug
- * Utility to read a file
- * @param path
- * @return
- */
- public List readFile(String path) {
- ArrayList fileLines = new ArrayList<>();
- try(FileReader fr = new FileReader(path);
- BufferedReader br = new BufferedReader(fr)) {
- String sCurrentLine;
- while ((sCurrentLine = br.readLine()) != null) {
- fileLines.add(sCurrentLine);
- }
- } catch (IOException e) {
- Control.getInstance().showErrorMessage(e.getMessage());
- }
- return fileLines;
- }
+
+
}
\ No newline at end of file
diff --git a/src/main/java/com/gessi/dependency_detection/functionalities/OntologyHandler.java b/src/main/java/com/gessi/dependency_detection/functionalities/OntologyHandler.java
index 411bf4b..cfd0f39 100644
--- a/src/main/java/com/gessi/dependency_detection/functionalities/OntologyHandler.java
+++ b/src/main/java/com/gessi/dependency_detection/functionalities/OntologyHandler.java
@@ -3,12 +3,14 @@
import java.io.IOException;
import java.util.*;
+import com.gessi.dependency_detection.WordEmbedding;
+import com.gessi.dependency_detection.components.Node;
+import de.tudarmstadt.ukp.dkpro.core.api.syntax.type.constituent.S;
import org.apache.log4j.varia.NullAppender;
import org.apache.uima.UIMAException;
import com.gessi.dependency_detection.components.Dependency;
import com.gessi.dependency_detection.components.DependencyType;
-import com.gessi.dependency_detection.components.Node;
import com.gessi.dependency_detection.components.Status;
import com.hp.hpl.jena.ontology.DatatypeProperty;
import com.hp.hpl.jena.ontology.Individual;
@@ -57,14 +59,15 @@ public void loadOnt(String source, String path) throws IOException {
/**
* Analyse ontology classes and extract its infromation (terms, lemmas)
*
- * @param analizer
* @throws IOException
* @throws UIMAException
+ * @return
*/
- public void searchClasses(NLPAnalyser analizer) throws IOException, UIMAException {
+ public int searchClassesTfIdfBased() throws IOException, UIMAException {
ontClasses = new ArrayList<>();
classesWords = new ArrayList<>();
classesLemmas = new ArrayList<>();
+ int max=1;
ExtendedIterator> rootClasses = this.model.listClasses();
while (rootClasses.hasNext()) {
OntClass thisClass = (OntClass) rootClasses.next();
@@ -82,10 +85,10 @@ public void searchClasses(NLPAnalyser analizer) throws IOException, UIMAExceptio
ontTerm = words[i];
}
}
- String[] lemmas = extractLemmas(ontTerm, analizer);
-
+ String[] lemmas = extractLemmas(ontTerm);
ontClasses.add(thisClass);
classesWords.add(words);
+ if (words.length>max) max=words.length;
classesLemmas.add(lemmas);
for (int i = 0; i < lemmas.length; i++) {
synonyms.put(lemmas[i], new ArrayList<>());
@@ -93,204 +96,151 @@ public void searchClasses(NLPAnalyser analizer) throws IOException, UIMAExceptio
}
}
}
+ return max;
}
/**
- * Check the similarity between two terms
- *
- * @param reqTerm
- * @param ontLemma
+ * Analyse ontology classes and extract its infromation (terms, lemmas)
+ *
* @param analizer
- * @param thr
- * @return
- * @throws SimilarityException
- * @throws LexicalSemanticResourceException
+ * @throws IOException
+ * @throws UIMAException
*/
- private boolean isSynonym(String reqTerm, String ontLemma, NLPAnalyser analizer, double thr)
- throws SimilarityException, LexicalSemanticResourceException {
- if (!ontLemma.matches("\\d+|\\W+")) {
- if (!synonyms.get(ontLemma).contains(reqTerm) && !noSynonyms.get(ontLemma).contains(reqTerm)) {
- if (analizer.semanticSimilarity(reqTerm, ontLemma) >= thr) {
- synonyms.get(ontLemma).add(reqTerm);
+ public void searchClasses(NLPAnalyser analizer) throws IOException, UIMAException {
+ ontClasses = new ArrayList<>();
+ classesWords = new ArrayList<>();
+ classesLemmas = new ArrayList<>();
+ ExtendedIterator> rootClasses = this.model.listClasses();
+ while (rootClasses.hasNext()) {
+ OntClass thisClass = (OntClass) rootClasses.next();
- return true;
- } else {
- noSynonyms.get(ontLemma).add(reqTerm);
+ if (thisClass.getLocalName() != null) {
+ String ontTerm = "";
+ String[] words = thisClass.getLocalName()
+ .split("_|\\s|(?());
+ noSynonyms.put(lemmas[i], new ArrayList<>());
}
- } else if (synonyms.get(ontLemma).contains(reqTerm)) {
- return true;
- } else if (noSynonyms.get(ontLemma).contains(reqTerm)) {
- return false;
}
}
- return false;
}
- /**
- * Check if the req. term match with the term of the ontology
- *
- * @param term
- * @param lemma
- * @param ontWord
- * @param ontLemma
- * @return
- * @throws SimilarityException
- * @throws LexicalSemanticResourceException
- */
- private boolean isSameTerm(String term, String lemma, String ontWord, String ontLemma)
- throws SimilarityException, LexicalSemanticResourceException {
- if (term.equalsIgnoreCase(ontWord))
- return true;
- if (lemma.equals(ontWord))
- return true;
- if (lemma.equals(ontLemma))
- return true;
- if (term.equalsIgnoreCase(ontLemma))
- return true;
+ private String[] extractLemmas(String ontTerm) throws IOException {
+ TextPreprocessing textPreprocessing=new TextPreprocessing();
+ String l=textPreprocessing.text_preprocess(ontTerm);
+ RAKEKeywordExtractor rake=new RAKEKeywordExtractor();
+ List resAnalysis=rake.RAKEanalyzeNoStopword(l);
+ String[] res=new String[resAnalysis.size()];
+ return resAnalysis.toArray(res);
+ }
+ private String[] extractLemmas(String words, NLPAnalyser analizer) throws IOException, UIMAException {
+ String ontLemma = analizer.lemmatization(words);
+ return ontLemma.split(" ");
+ }
+
- if (term.toLowerCase().matches(ontWord + "s|es"))
- return true;
- if (lemma.matches(ontWord + "s|es"))
- return true;
- if (lemma.matches(ontLemma + "s|es"))
- return true;
- if (term.toLowerCase().matches(ontLemma + "s|es"))
- return true;
- return false;
- }
/**
- * check if a ordered set of words is the same of the set of words of the
- * ontology
+ * Find if the set of words contains a correct n-gram that match with the
+ * ontology.
*
- * @param ngramTerm
- * @param ngramLemma
- * @param words
+ * @param node
* @param lemmas
- * @param analizer
* @param syny
- * @param thr
* @return
* @throws SimilarityException
* @throws LexicalSemanticResourceException
*/
- private boolean isSameNgram(Stack ngramTerm, Stack ngramLemma, String[] words, String[] lemmas,
- NLPAnalyser analizer, boolean syny, double thr)
- throws SimilarityException, LexicalSemanticResourceException {
- boolean find = false;
- ArrayList idxOntLemmaAnalized = new ArrayList<>();
- ArrayList idxReqLemmaAnalized = new ArrayList<>();
- for (int i = 0; i < ngramTerm.size(); i++) {
- if (!find && i > 0) {
- return false;
- }
- find = false;
- int j = 0;
- while (j < words.length && !find) {
- if (!idxOntLemmaAnalized.contains(j)
- && isSameTerm(ngramTerm.get(i), ngramLemma.get(i), words[j], lemmas[j])) {
- find = true;
- idxReqLemmaAnalized.add(i);
- idxOntLemmaAnalized.add(j);
- }
- j++;
- }
+ private boolean extractNGram(String node, String[] lemmas, boolean syny,double thr,WordEmbedding wordEmbedding) throws SimilarityException, LexicalSemanticResourceException, IOException {
+ String[] lemmasNode = node.split(" ");
+ Set nodeSet=new HashSet(Arrays.asList(lemmasNode));
+ Set lemmaSet=new HashSet(Arrays.asList(lemmas));
+ if (syny) {
+ return isSynonym(nodeSet,lemmaSet,thr,wordEmbedding);
}
-
- // of it is not detected, check the synonymy
- if (!find && syny) {
-
- for (int i = 0; i < ngramLemma.size(); i++) {
- if (!idxReqLemmaAnalized.contains(i)) {
- if (!find && i > 0) {
- return false;
- }
- find = false;
- int j = 0;
- while (j < lemmas.length && !find) {
- if (!idxOntLemmaAnalized.contains(j)
- && isSynonym(ngramLemma.get(i), lemmas[j], analizer, thr)) {
- find = true;
- idxOntLemmaAnalized.add(j);
- }
- j++;
- }
- } else find = true;
- }
+ else {
+ return nodeSet.containsAll(lemmaSet);
}
- return find;
}
- /**
- * Find all the combinations of the n-gram to check if the req. concept matches
- * with the ont. concept
- *
- * @param idx
- * @param level
- * @param n
- * @param termsNode
- * @param lemmasNode
- * @param ngramTerm
- * @param ngramLemma
- * @param words
- * @param lemmas
- * @param analizer
- * @param syny
- * @param thr
- * @return
- * @throws SimilarityException
- * @throws LexicalSemanticResourceException
- */
- private boolean findPotentialNgram(int idx, int level, int n, String[] termsNode, String[] lemmasNode,
- Stack ngramTerm, Stack ngramLemma, String[] words, String[] lemmas, NLPAnalyser analizer,
- boolean syny, double thr) throws SimilarityException, LexicalSemanticResourceException {
- boolean find = false;
- for (int j = idx; j < termsNode.length && !find; j++) {
- ngramTerm.push(termsNode[j]);
- ngramLemma.push(lemmasNode[j]);
- if (level < n) {
- find = findPotentialNgram(j + 1, level + 1, n, termsNode, lemmasNode, ngramTerm, ngramLemma, words,
- lemmas, analizer, syny, thr);
+ private boolean isSynonym(Set requirementLemmas, Set ontologyLemmas,double thr,WordEmbedding wordEmbedding) throws IOException {
+ boolean isSynonym=true;
+ for (String s: ontologyLemmas) {
+ boolean synonymExists=false;
+ for (String l:requirementLemmas) {
+ if (wordEmbedding.computeSimilarity(s,l)>=thr) {
+ synonymExists=true;
+ break;
+ }
}
- if (level == n && isSameNgram(ngramTerm, ngramLemma, words, lemmas, analizer, syny, thr)) return true;
- ngramTerm.pop();
- ngramLemma.pop();
+ isSynonym=isSynonym&&synonymExists;
+ if (!isSynonym) return false;
}
- return find;
+ return true;
}
/**
- * Find if the set of words contains a correct n-gram that match with the
- * ontology.
+ * Analyze the potential term candidates extracted from the requirements (n-gram
+ * concepts), and store the requirement within the related ontology class if
+ * they matches with a concept of the ontology.
*
- * @param node
- * @param words
- * @param lemmas
- * @param analizer
+ * @param keywords
+ * @param reqId
+ * @param requirement
* @param syny
- * @param thr
- * @return
+ * @throws IOException
* @throws SimilarityException
* @throws LexicalSemanticResourceException
*/
- private boolean extractNGram(Node node, String[] words, String[] lemmas, NLPAnalyser analizer, boolean syny,
- double thr) throws SimilarityException, LexicalSemanticResourceException {
- String[] termsNode = node.getTerm().split(" ");
- String[] lemmasNode = node.getLemma().split(" ");
- int n = words.length;
- Stack ngramTerm = new Stack<>();
- Stack ngramLemma = new Stack<>();
+ public void matching(String keywords, String reqId, String requirement, boolean syny,double thr,WordEmbedding wordEmbedding) throws IOException, SimilarityException, LexicalSemanticResourceException {
+ ArrayList classes = new ArrayList<>();
+ String[] lemmas;
+ for (int j = 0; j < ontClasses.size(); j++) {
+ lemmas = classesLemmas.get(j);
+ if (keywords.split(" ").length >= lemmas.length && extractNGram(keywords, lemmas, syny,thr,wordEmbedding)) {
+ System.out.println("Requirement " + reqId + " contains class " + String.join(" ", lemmas));
+ //System.out.println("REQUIREMENT KEYWORDS: "+keywords);
+ //System.out.println("ONTOLOGY NAME: "+lemmas.toString());
+
+ classes.add(ontClasses.get(j));
+ }
+ }
- return findPotentialNgram(0, 1, n, termsNode, lemmasNode, ngramTerm, ngramLemma, words, lemmas, analizer, syny, thr);
+ // Requirement instantiation within the ontology
+ for (OntClass cls : classes) {
+ //System.out.println("A MATCH WAS MADE");
+ Individual individual = this.model.createIndividual(this.source + ":" + reqId + "_" + cls.getLocalName(),
+ cls);
+ DatatypeProperty req = this.model.getDatatypeProperty(this.source + "#requirement");
+ individual.setPropertyValue(req, this.model.createTypedLiteral(requirement));
+ DatatypeProperty id = this.model.getDatatypeProperty(this.source + "#id");
+ individual.setPropertyValue(id, this.model.createTypedLiteral(reqId));
+ DatatypeProperty className = this.model.getDatatypeProperty(this.source + "#class");
+ individual.setPropertyValue(className, this.model.createTypedLiteral(cls.getLocalName()));
+ }
}
-
/**
* Analyze the potential term candidates extracted from the requirements (n-gram
* concepts), and store the requirement within the related ontology class if
* they matches with a concept of the ontology.
- *
+ *
* @param topNodes
* @param reqId
* @param requirement
@@ -301,7 +251,8 @@ private boolean extractNGram(Node node, String[] words, String[] lemmas, NLPAnal
* @throws SimilarityException
* @throws LexicalSemanticResourceException
*/
- public void matching(List topNodes, String reqId, String requirement, NLPAnalyser analizer, boolean syny,
+
+ public void matchingRuleBased(List topNodes, String reqId, String requirement, NLPAnalyser analizer, boolean syny,
double thr) throws IOException, SimilarityException, LexicalSemanticResourceException {
ArrayList classes = new ArrayList<>();
String[] words;
@@ -310,7 +261,7 @@ public void matching(List topNodes, String reqId, String requirement, NLPA
for (int j = 0; j < ontClasses.size(); j++) {
words = classesWords.get(j);
lemmas = classesLemmas.get(j);
- if (topNodes.get(i).getTerm().split(" ").length >= words.length && extractNGram(topNodes.get(i), words, lemmas, analizer, syny, thr)) classes.add(ontClasses.get(j));
+ if (topNodes.get(i).getTerm().split(" ").length >= words.length && extractNGramRuleBased(topNodes.get(i), words, lemmas, analizer, syny, thr)) classes.add(ontClasses.get(j));
}
}
@@ -327,21 +278,9 @@ public void matching(List topNodes, String reqId, String requirement, NLPA
}
}
- /**
- * Extract lemmas from ontology classes
- * @param words
- * @param analizer
- * @return
- * @throws IOException
- * @throws UIMAException
- */
- private String[] extractLemmas(String words, NLPAnalyser analizer) throws IOException, UIMAException {
- String ontLemma = analizer.lemmatization(words);
- return ontLemma.split(" ");
- }
/**
- * Analyze the ontology and extract dependncies
+ * Analyze the ontology and extract dependencies
* @return
*/
public List ontConflictDetection() {
@@ -368,7 +307,7 @@ public List ontConflictDetection() {
if (!f.equals(t)) {
Dependency newDep = new Dependency(f, t, Status.PROPOSED,
DependencyType.valueOf((String) dep.get(0).toString().toUpperCase()));
- dependencies.add(newDep);
+ if (!dependencies.contains(newDep)) dependencies.add(newDep);
}
}
}
@@ -427,5 +366,181 @@ private List