Skip to content

Commit

Permalink
Merge pull request #3 from OpenReqEU/DeveloperAntoni
Browse files Browse the repository at this point in the history
TFIDF feature extraction branch
  • Loading branch information
quim-motger authored Dec 4, 2019
2 parents 486eec7 + 7ddc776 commit 6cb10e4
Show file tree
Hide file tree
Showing 19 changed files with 2,062 additions and 301 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ First of all it is necessary to download the following external files and depend
- [WordNet ESA](https://docs.google.com/uc?export=download&id=1I6oQqIeZva1CwLA96OkHFSZKiBfUgWLe)
- [WordNet LexSemResources](https://docs.google.com/uc?export=download&id=1TeYlsHbcCtxbsVVoBvttdVsvbKFHPbZn)

Seconf of all it is necessary to download the following file and extract its content into gloveModel/ folder (at the root of the service)

- [GloveModel](https://drive.google.com/file/d/1E-jkanZQSjXAuwx3EXyGKAyMQ8QBWobA/view?usp=sharing)

Then is necessary to configure the DKPRO_HOME variable with the resources directory path:

- export DKPRO_HOME=/path/dependency-detection/src/main/resources
Expand Down
4 changes: 4 additions & 0 deletions gloveModel/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore
21 changes: 21 additions & 0 deletions libs/linguistic/rake/1.0/LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2018 Linguistic

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
Binary file added libs/linguistic/rake/1.0/rake-1.0.jar
Binary file not shown.
10 changes: 10 additions & 0 deletions libs/linguistic/rake/1.0/rake-1.0.pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"
xmlns="http://maven.apache.org/POM/4.0.0">
<modelVersion>4.0.0</modelVersion>
<groupId>linguistic</groupId>
<artifactId>rake</artifactId>
<version>1.0</version>
<description>POM was created from install:install-file</description>
</project>
27 changes: 27 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,33 @@
<artifactId>spring-boot-starter-log4j2</artifactId>
</dependency>

<!-- Keyword analysis -->
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analyzers-common</artifactId>
<version>7.7.1</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>7.7.1</version>
</dependency>
<dependency>
<groupId>linguistic</groupId>
<artifactId>rake</artifactId>
<version>1.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/libs/linguistic/rake/1.0/rake-1.0.jar</systemPath>
</dependency>


<!--Word embedding for similarity -->
<dependency>
<groupId>de.jungblut.glove</groupId>
<artifactId>glove</artifactId>
<version>0.3</version>
</dependency>


<!-- API dependencies (Spring boot) -->
<dependency>
Expand Down
56 changes: 56 additions & 0 deletions src/main/java/com/gessi/dependency_detection/WordEmbedding.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.gessi.dependency_detection;

import de.jungblut.glove.GloveRandomAccessReader;
import de.jungblut.glove.impl.GloveBinaryRandomAccessReader;
import de.jungblut.math.DoubleVector;

import java.io.IOException;
import java.nio.file.Paths;

import static java.lang.Math.sqrt;

public class WordEmbedding {

GloveRandomAccessReader db = new GloveBinaryRandomAccessReader(Paths.get("gloveModel"));

public WordEmbedding() throws IOException {
}


/**
* Computes the cosine similarity between two words, if these vectors exist in the underlying Glove model
* @param a first word
* @param b second word
* @return The cosine similarity between the two words
*/
public Double computeSimilarity(String a, String b) throws IOException {
DoubleVector help1 = null, help2 = null;
if (db.contains(a)) help1 = db.get(a);
if (db.contains(b)) help2 = db.get(b);
if (help1 != null && help2 != null) {
return cosineSimilarity(help1,help2);
} else return -1.0;
}


private Double cosineSimilarity(DoubleVector help1, DoubleVector help2) {
double[] one=help1.toArray();
double[] two=help2.toArray();
int length=one.length;
Double sum = 0.0;
if (two.length>length) length=two.length;
for (int i=0;i<length;++i) {
sum += one[i] * two[i];
}
return sum / (norm(one) * norm(two));
}
private Double norm(double[] array) {
Double tot = 0.0;
for (Double d : array) {
tot += d * d;
}
return sqrt(tot);
}

}

Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,23 @@ public DependencyType getDependencyType() {
return dependencyType;
}

@Override
public boolean equals(Object o) {

if (o instanceof Dependency){
Dependency dep = (Dependency) o;
return (dep.getFrom().equals(this.from) && dep.getTo().equals(this.to) && dep.getDependencyType().equals(this.dependencyType));
}

return false;
}

@Override
public int hashCode() {
int hash = 7;
hash = 17 * hash + (this.to != null ? this.to.hashCode() : 0) + (this.from != null ? this.from.hashCode() : 0) +
(this.dependencyType != null ? this.dependencyType.hashCode() : 0);
return hash;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@

import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.concurrent.ExecutionException;

import javax.validation.Valid;
import javax.validation.constraints.NotNull;
import javax.ws.rs.QueryParam;

import com.gessi.dependency_detection.domain.KeywordTool;
import com.gessi.dependency_detection.util.Control;
import org.apache.uima.UIMAException;
import org.apache.uima.resource.ResourceInitializationException;
Expand Down Expand Up @@ -78,7 +80,9 @@ public ResponseEntity uploadJSONFile(
@ApiParam(value = "The JSON file to upload", required = true) @RequestPart("json") @Valid String json,
@ApiParam(value = "Id of the project where the requirements to analize are.", required = true) @PathVariable("projectId") String projectId,
@ApiParam(value = "If true, semantic similarity (synonymy) detection is applied to improve the detection algorithm.", required = true) @RequestParam(value = "synonymy", required = true) Boolean synonymy,
@ApiParam(value = "Threshold of semantic similarity to detect synonyms (included).", required = false) @RequestParam(value = "threshold", required = false) Double threshold)
@ApiParam(value = "Threshold of semantic similarity to detect synonyms (included).", required = false) @RequestParam(value = "threshold", required = false) Double threshold,
@ApiParam(value = "Keyword extraction tool (RULE_BASED or TFIDF_BASED)", required = false) @RequestParam(value = "keywordTool", required = false,
defaultValue = "RULE_BASED") KeywordTool keywordTool)
throws IOException, InterruptedException {
Control.getInstance().showInfoMessage("Start computing");
ObjectNode onjN = null;
Expand All @@ -101,7 +105,7 @@ public ResponseEntity uploadJSONFile(
// apply the dependency detection

onjN = depService.conflictDependencyDetection(projectId, synonymy,
threshold);
threshold, keywordTool);

/* Delete the uploaded file */
depService.deleteAll();
Expand All @@ -113,6 +117,8 @@ public ResponseEntity uploadJSONFile(
return new ResponseEntity<>(createException(e.toString(),"NLP Error"), HttpStatus.INTERNAL_SERVER_ERROR);
} catch (SimilarityException | LexicalSemanticResourceException e) {
return new ResponseEntity<>(createException(e.toString(),"Similarity Error"), HttpStatus.INTERNAL_SERVER_ERROR);
} catch (ExecutionException e) {
e.printStackTrace();
}
return new ResponseEntity<>(onjN, HttpStatus.OK);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package com.gessi.dependency_detection.domain;

public enum KeywordTool {

RULE_BASED,
TFIDF_BASED

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.gessi.dependency_detection.domain;

public class Requirement {
String description;
String id;

public Requirement(String s, String s1) {
description=s1;
id=s;
}

public String getDescription() {
return description;
}

public void setDescription(String description) {
this.description = description;
}

public String getId() {
return id;
}

public void setId(String id) {
this.id = id;
}
}
Loading

0 comments on commit 6cb10e4

Please sign in to comment.