Skip to content

Commit

Permalink
Merge branch 'develop' into EA-3619_AddWorkflows
Browse files Browse the repository at this point in the history
  • Loading branch information
SrishtiSingh-eu authored Dec 4, 2023
2 parents 1db76ff + 50ddd75 commit 253a375
Show file tree
Hide file tree
Showing 27 changed files with 394 additions and 109 deletions.
67 changes: 32 additions & 35 deletions .github/workflows/build_test_analyse.yml
Original file line number Diff line number Diff line change
@@ -1,35 +1,32 @@
name: Build, Run Tests and Sonar Analysis
on: push

jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
# Shallow clones should be disabled for a better relevancy of analysis
fetch-depth: 0
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: 11
- name: Cache Maven packages
uses: actions/cache@v1
with:
path: ~/.m2
key: ${{ runner.os }}-m2-${{ hashFiles('**/pom.xml') }}
restore-keys: ${{ runner.os }}-m2
- name: Cache SonarCloud packages
uses: actions/cache@v1
with:
path: ~/.sonar/cache
key: ${{ runner.os }}-sonar
restore-keys: ${{ runner.os }}-sonar
- name: Build, run tests and analyse
run: mvn -B verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Pcoverage -Dsonar.projectKey=europeana_translation-api
env:
# Needed to get some information about the pull request, if any
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# SonarCloud access token should be generated from https://sonarcloud.io/account/security/
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
name: Build, Run Tests and Sonar Analysis
on: push

jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
# Shallow clones should be disabled for a better relevancy of analysis
fetch-depth: 0
- name: Set up JDK 17
uses: actions/setup-java@v3
with:
distribution: 'temurin' #should use the same as in the docker file
java-version: 17
cache: 'maven'
cache-dependency-path: 'sub-project/pom.xml' # optional
- name: Cache SonarCloud packages
uses: actions/cache@v3
with:
path: ~/.sonar/cache
key: ${{ runner.os }}-sonar
restore-keys: ${{ runner.os }}-sonar
- name: Build, run tests and analyse
run: mvn -B verify org.sonarsource.scanner.maven:sonar-maven-plugin:sonar -Pcoverage -Dsonar.projectKey=europeana_translation-api
env:
# Needed to get some information about the pull request, if any
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# SonarCloud access token should be generated from https://sonarcloud.io/account/security/
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
16 changes: 15 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<module>translation-web</module>
<module>translation-tests</module>
<module>translation-client</module>

<module>translation-service-apache-tika</module>
</modules>

<repositories>
Expand All @@ -49,6 +49,7 @@
<java.version>17</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<maven.compiler.release>${java.version}</maven.compiler.release>
<api-commons.version>0.3.22-SNAPSHOT</api-commons.version>
<jettison.version>1.3</jettison.version>

Expand All @@ -73,6 +74,7 @@
<sonar.cpd.exclusions>**/model/**/*</sonar.cpd.exclusions>
<aggregate.report.xml>translation-tests/target/site/jacoco-aggregate/jacoco.xml</aggregate.report.xml>
<sonar.coverage.jacoco.xmlReportPaths>${aggregate.report.xml}</sonar.coverage.jacoco.xmlReportPaths>
<apache.tika.version>2.9.1</apache.tika.version>

</properties>

Expand Down Expand Up @@ -271,6 +273,18 @@
</lifecycleMappingMetadata>
</configuration>
</plugin>
<plugin>
<inherited>true</inherited>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<!--
<version>3.10.1</version>
-->
<configuration>
<source>${version.java}</source>
<target>${version.java}</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,8 @@ public class InvalidLanguageException extends Exception{
public InvalidLanguageException(String message) {
super(message);
}

public InvalidLanguageException(String message, Throwable cause) {
super(message, cause);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public static Language validateSingle(String languageAbbrevation) throws Invalid
try {
result = Language.valueOf(languageAbbrevation.trim().toUpperCase(Locale.ROOT));
} catch (IllegalArgumentException e) {
throw new InvalidLanguageException("Language value '" + languageAbbrevation + "' is not valid");
throw new InvalidLanguageException("Language value '" + languageAbbrevation + "' is not valid", e);
}
return result;
}
Expand All @@ -64,8 +64,8 @@ public static List<Language> validateMultiple(String languageAbbrevations) throw
throw new InvalidLanguageException("Empty language value");
}

List<Language> result = new ArrayList<>();
String[] languages = languageAbbrevations.split(SEPARATOR);
List<Language> result = new ArrayList<>(languages.length);
for (String language: languages) {
result.add(validateSingle(language));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,61 +5,65 @@
import eu.europeana.api.translation.definitions.vocabulary.TranslationAppConstants;

/**
* Class to hold the Language pair values supported by the Translation services
* Mostly for the future Translation API, when we have more than one translation service
* Class to hold the Language pair values supported by the Translation services Mostly for the
* future Translation API, when we have more than one translation service
*
* @author Hugo
* @since 5 Apr 2023
*/
public class LanguagePair implements Comparable<LanguagePair> {

private String srcLang;
private String targetLang;
private String srcLang;
private String targetLang;

public LanguagePair(String srcLang, @NotNull String targetLang) {
this.srcLang = srcLang;
this.targetLang = targetLang;
public LanguagePair(String srcLang, @NotNull String targetLang) {
this.srcLang = srcLang;
this.targetLang = targetLang;
}

public String getSrcLang() {
return srcLang;
}

@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}

public String getSrcLang() {
return srcLang;
if (!(this.getClass() == obj.getClass())) {
return false;
}

@Override
public boolean equals(Object obj) {
if (!(obj instanceof LanguagePair)) {
return false;
}
LanguagePair pair = (LanguagePair) obj;
return StringUtils.equals(targetLang, pair.targetLang)
&& StringUtils.equals(srcLang, pair.srcLang);
}

LanguagePair pair = (LanguagePair) obj;
return StringUtils.equals(targetLang, pair.targetLang)
&& StringUtils.equals(srcLang, pair.srcLang);
@Override
public int compareTo(LanguagePair pair) {
int ret = targetLang.compareTo(pair.targetLang);
if (ret == 0) {
ret = StringUtils.compare(srcLang, pair.srcLang);
}
return ret;
}

@Override
public int compareTo(LanguagePair pair) {
int ret = targetLang.compareTo(pair.targetLang);
if(ret == 0) {
ret = StringUtils.compare(srcLang, pair.srcLang);
}
return ret;
}

@Override
public String toString() {
return generateKey(srcLang, targetLang);
}
@Override
public String toString() {
return generateKey(srcLang, targetLang);
}

public static String generateKey(String srcLang, String targetLang) {
return srcLang + TranslationAppConstants.LANG_DELIMITER + targetLang;
}

@Override
public int hashCode() {
return srcLang==null ? targetLang.hashCode() : srcLang.hashCode() + targetLang.hashCode();
}
public static String generateKey(String srcLang, String targetLang) {
return srcLang + TranslationAppConstants.LANG_DELIMITER + targetLang;
}

public String getTargetLang() {
return targetLang;
}
@Override
public int hashCode() {
return srcLang == null ? targetLang.hashCode() : srcLang.hashCode() + targetLang.hashCode();
}

public String getTargetLang() {
return targetLang;
}
}
33 changes: 33 additions & 0 deletions translation-service-apache-tika/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>eu.europeana.api</groupId>
<artifactId>translation-api</artifactId>
<version>0.0.1-SNAPSHOT</version>
</parent>
<artifactId>translation-service-apache-tika</artifactId>
<name>translation-service-apache-tika</name>
<description>The Java APIs for the Apache Tika language detection services (part of Translation API)</description>

<properties>
<sonar.coverage.jacoco.xmlReportPaths>${basedir}/../${aggregate.report.xml}</sonar.coverage.jacoco.xmlReportPaths>
</properties>

<dependencies>
<dependency>
<groupId>eu.europeana.api</groupId>
<artifactId>translation-service-common</artifactId>
<version>0.0.1-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>${apache.tika.version}</version>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-langdetect-optimaize</artifactId>
<version>${apache.tika.version}</version>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package eu.europeana.translation.service.apachetika;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.tika.langdetect.optimaize.OptimaizeLangDetector;
import org.apache.tika.language.detect.LanguageDetector;
import org.apache.tika.language.detect.LanguageResult;
import eu.europeana.api.translation.service.LanguageDetectionService;
import eu.europeana.api.translation.service.exception.LanguageDetectionException;

public class ApacheTikaLangDetectService implements LanguageDetectionService {

protected static final Logger LOG = LogManager.getLogger(ApacheTikaLangDetectService.class);
private LanguageDetector detector;
private String serviceId;

private Set<String> supportedLanguages = Set.of("af", "an", "ar", "ast", "be", "br", "ca", "bg",
"bn", "cs", "cy", "da", "de", "el", "en", "es", "et", "eu", "fa", "fi", "fr", "ga", "gl", "gu", "he", "hi",
"hr", "ht", "hu", "id", "is", "it", "ja", "km", "kn", "ko", "lt", "lv", "mk", "ml", "mr", "ms", "mt",
"ne", "nl", "no", "oc", "pa", "pl", "pt", "ro", "ru", "sk", "sl", "so", "sq", "sr", "sv", "sw", "ta", "te", "th", "tl",
"tr", "uk", "ur", "vi", "wa", "yi", "zh-cn", "zh-tw");

public ApacheTikaLangDetectService() {
this.detector = new OptimaizeLangDetector().loadModels();
}

@Override
public boolean isSupported(String srcLang) {
return supportedLanguages.contains(srcLang);
}

@Override
public List<String> detectLang(List<String> texts, String langHint) throws LanguageDetectionException {
if (texts.isEmpty()) {
return Collections.emptyList();
}

List<String> detectedLangs = new ArrayList<>(texts.size());
List<LanguageResult> tikaLanguages=null;
for(String text : texts) {
//returns all tika languages sorted by score
tikaLanguages = this.detector.detectAll(text);

detectedLangs.add(chooseDetectedLang(tikaLanguages, langHint));

}
return detectedLangs;
}

/**
* In case lang hint is not null, check if it myabe exists among the langs with the highest confidence,
* and if so return the langHint as a detected lang, if not return the first one.
*/
private String chooseDetectedLang(List<LanguageResult> tikaLanguages, String langHint) {
if(tikaLanguages.isEmpty()) {
return null;
}
//if langHint is null, return the first detected language (has the highest confidence)
if(StringUtils.isBlank(langHint)) {
return tikaLanguages.get(0).getLanguage();
}

String detectedLang=tikaLanguages.get(0).getLanguage();
if(langHint.equals(detectedLang)) {
return langHint;
}
float confidence=tikaLanguages.get(0).getRawScore();
for(int i=1;i<tikaLanguages.size();i++) {
if(tikaLanguages.get(i).getRawScore()>=confidence) {
if(langHint.equals(tikaLanguages.get(i).getLanguage())) {
detectedLang=langHint;
break;
}
} else {
break;
}
}
return detectedLang;
}

@Override
public void close() {
}

@Override
public String getServiceId() {
return serviceId;
}

@Override
public void setServiceId(String serviceId) {
this.serviceId = serviceId;
}

@Override
public String getExternalServiceEndPoint() {
return null;
}

}
Loading

0 comments on commit 253a375

Please sign in to comment.