diff --git a/.github/workflows/ci-build-manual.yml b/.github/workflows/ci-build-manual.yml new file mode 100644 index 00000000..8c009b0a --- /dev/null +++ b/.github/workflows/ci-build-manual.yml @@ -0,0 +1,40 @@ +name: Build and push a development version on docker + +on: + workflow_dispatch: + + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v1 + - name: Set up JDK 17 + uses: actions/setup-java@v1 + with: + java-version: 1.17 + - name: Build with Gradle + run: ./gradlew build -x test + + docker-build: + needs: [ build ] + runs-on: ubuntu-latest + + steps: + - name: Create more disk space + run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY" + - uses: actions/checkout@v2 + - name: Build and push + id: docker_build + uses: mr-smithers-excellent/docker-build-push@v5 + with: + dockerfile: Dockerfile.local + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + image: lfoppiano/grobid-quantities + registry: docker.io + pushImage: true + tags: latest-develop + - name: Image digest + run: echo ${{ steps.docker_build.outputs.digest }} \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 26fa4e6f..f1b4526c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,9 +13,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed -+ Updated to Grobid version 0.8.0 ++ Updated to Grobid version 0.8.0 + Updated to Dropwizard version 4.x (from version 1.x) + + ## [0.7.3] – 2023-06-26 ### Added diff --git a/Dockerfile b/Dockerfile index d4feb329..07b294cf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -50,6 +50,7 @@ WORKDIR /opt FROM grobid/grobid:0.7.3 as runtime + # setting locale is likely useless but to be sure ENV LANG C.UTF-8 diff --git a/build.gradle b/build.gradle index 70ecd88f..0ea5d823 100644 --- a/build.gradle +++ b/build.gradle @@ -74,7 +74,7 @@ repositories { dependencies { //Tests - testImplementation 'org.junit.vintage:junit-vintage-engine:5.9.3' + testRuntimeOnly 'org.junit.vintage:junit-vintage-engine:5.9.3' testImplementation(platform('org.junit:junit-bom:5.9.3')) testImplementation('org.junit.jupiter:junit-jupiter') testImplementation 'org.easymock:easymock:5.1.0' @@ -84,9 +84,9 @@ dependencies { testImplementation 'org.jetbrains.kotlin:kotlin-test' //GROBID - implementation 'org.grobid:grobid-core:0.7.3' - implementation 'org.grobid:grobid-trainer:0.7.3' - implementation 'org.grobid:grobid-service:0.7.3' + implementation 'org.grobid:grobid-core:0.8.0' + implementation 'org.grobid:grobid-trainer:0.8.0' + implementation 'org.grobid:grobid-service:0.8.0' implementation "xerces:xercesImpl:2.12.0" implementation "net.arnx:jsonic:1.3.10" implementation "net.sf.saxon:Saxon-HE:9.6.0-9" @@ -104,10 +104,10 @@ dependencies { implementation 'commons-pool:commons-pool:1.6' //Json - implementation 'com.fasterxml.jackson.core:jackson-core:2.13.3' - implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.3' - implementation 'com.fasterxml.jackson.module:jackson-module-afterburner:2.13.3' - implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.13.3' + implementation 'com.fasterxml.jackson.core:jackson-core:2.14.3' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.14.3' + implementation 'com.fasterxml.jackson.module:jackson-module-afterburner:2.14.3' + implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.14.3' // measurements @@ -122,20 +122,21 @@ dependencies { implementation group: 'systems.uom', name: 'systems-unicode', version: '2.1' //Dropwizard - implementation "com.hubspot.dropwizard:dropwizard-guicier:1.3.5.2" - implementation "io.dropwizard:dropwizard-core:1.3.29" - implementation "io.dropwizard:dropwizard-assets:1.3.29" - implementation "io.dropwizard:dropwizard-testing:1.3.29" - implementation "io.dropwizard:dropwizard-forms:1.3.29" - implementation "io.dropwizard:dropwizard-client:1.3.29" - implementation "io.dropwizard:dropwizard-auth:1.3.29" - implementation "io.dropwizard.metrics:metrics-core:4.0.0" - implementation "io.dropwizard.metrics:metrics-servlets:4.0.0" - implementation 'javax.servlet:javax.servlet-api:3.1.0' + implementation 'ru.vyarus:dropwizard-guicey:7.0.0' + + implementation 'io.dropwizard:dropwizard-bom:4.0.0' + implementation 'io.dropwizard:dropwizard-core:4.0.0' + implementation 'io.dropwizard:dropwizard-assets:4.0.0' + implementation 'io.dropwizard:dropwizard-testing:4.0.0' + implementation 'io.dropwizard:dropwizard-forms:4.0.0' + implementation 'io.dropwizard:dropwizard-client:4.0.0' + implementation 'io.dropwizard:dropwizard-auth:4.0.0' + implementation 'io.dropwizard.metrics:metrics-core:4.2.22' + implementation 'io.dropwizard.metrics:metrics-servlets:4.2.22' //Misc implementation group: 'com.googlecode.clearnlp', name: 'clearnlp', version: '1.3.1' - implementation 'com.google.guava:guava:30.1.1-jre' + implementation 'com.google.guava:guava:31.0.1-jre' implementation "net.arnx:jsonic:1.3.10" // XML diff --git a/gradle.properties b/gradle.properties index 711024e9..65a82a47 100644 --- a/gradle.properties +++ b/gradle.properties @@ -1 +1 @@ -version=0.7.4-SNAPSHOT +version=0.8.0-SNAPSHOT diff --git a/localLibs/grobid-core-0.7.3.jar b/localLibs/grobid-core-0.8.0.jar similarity index 94% rename from localLibs/grobid-core-0.7.3.jar rename to localLibs/grobid-core-0.8.0.jar index bd2e5a38..b5edf72f 100644 Binary files a/localLibs/grobid-core-0.7.3.jar and b/localLibs/grobid-core-0.8.0.jar differ diff --git a/localLibs/grobid-service-0.7.3.jar b/localLibs/grobid-service-0.8.0.jar similarity index 94% rename from localLibs/grobid-service-0.7.3.jar rename to localLibs/grobid-service-0.8.0.jar index fa357016..f2f86e34 100644 Binary files a/localLibs/grobid-service-0.7.3.jar and b/localLibs/grobid-service-0.8.0.jar differ diff --git a/localLibs/grobid-trainer-0.7.3.jar b/localLibs/grobid-trainer-0.8.0.jar similarity index 97% rename from localLibs/grobid-trainer-0.7.3.jar rename to localLibs/grobid-trainer-0.8.0.jar index fbf20a4a..780c0f2a 100644 Binary files a/localLibs/grobid-trainer-0.7.3.jar and b/localLibs/grobid-trainer-0.8.0.jar differ diff --git a/resources/config/config.yml b/resources/config/config.yml index 5051a919..ca00cac7 100644 --- a/resources/config/config.yml +++ b/resources/config/config.yml @@ -56,9 +56,9 @@ models: embeddings_name: "glove-840B" -views: - .mustache: - cache: false +#views: +# .mustache: +# cache: false server: type: custom @@ -66,19 +66,21 @@ server: - type: http port: 8060 idleTimeout: 120 seconds + acceptQueueSize: 2048 + adminConnectors: - type: http port: 8061 registerDefaultExceptionMappers: false maxThreads: 2048 maxQueuedRequests: 2048 - acceptQueueSize: 2048 logging: level: INFO appenders: - type: console threshold: INFO + timeZone: UTC #Docker-ignore-log-start - type: file threshold: DEBUG @@ -89,5 +91,4 @@ logging: timeZone: UTC maxFileSize: 10MB #Docker-ignore-log-end -timeZone: UTC diff --git a/src/main/java/org/grobid/core/engines/QuantitiesEngine.java b/src/main/java/org/grobid/core/engines/QuantitiesEngine.java index ace2de05..32004a21 100644 --- a/src/main/java/org/grobid/core/engines/QuantitiesEngine.java +++ b/src/main/java/org/grobid/core/engines/QuantitiesEngine.java @@ -34,9 +34,9 @@ import org.xml.sax.InputSource; import org.xml.sax.SAXException; -import javax.inject.Inject; -import javax.inject.Singleton; -import javax.ws.rs.core.Response; +import jakarta.inject.Inject; +import jakarta.inject.Singleton; +import jakarta.ws.rs.core.Response; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; @@ -129,7 +129,7 @@ public MeasurementsResponse processPdf(InputStream inputStream) { BiblioItem resHeader = new BiblioItem(); //parsers.getHeaderParser().processingHeaderSection(false, doc, resHeader); - resHeader.generalResultMapping(labeledResult, tokenizationHeader); + resHeader.generalResultMappingHeader(labeledResult, tokenizationHeader); // title List titleTokens = resHeader.getLayoutTokens(TaggingLabels.HEADER_TITLE); diff --git a/src/main/java/org/grobid/core/engines/QuantityParser.java b/src/main/java/org/grobid/core/engines/QuantityParser.java index b305e335..a74240ff 100644 --- a/src/main/java/org/grobid/core/engines/QuantityParser.java +++ b/src/main/java/org/grobid/core/engines/QuantityParser.java @@ -1,10 +1,10 @@ package org.grobid.core.engines; import com.google.common.collect.Iterables; +import jakarta.inject.Inject; import org.apache.commons.collections4.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; -import org.apache.log4j.Layout; import org.grobid.core.GrobidModel; import org.grobid.core.analyzers.QuantityAnalyzer; import org.grobid.core.data.Measurement; @@ -18,8 +18,6 @@ import org.grobid.core.engines.label.TaggingLabel; import org.grobid.core.exceptions.GrobidException; import org.grobid.core.features.FeaturesVectorQuantities; -import org.grobid.core.lang.SentenceDetector; -import org.grobid.core.lang.impl.OpenNLPSentenceDetector; import org.grobid.core.layout.BoundingBox; import org.grobid.core.layout.LayoutToken; import org.grobid.core.lexicon.QuantityLexicon; @@ -29,7 +27,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.inject.Inject; import java.math.BigDecimal; import java.util.ArrayList; import java.util.Arrays; @@ -182,9 +179,10 @@ public List process(List layoutTokens) { } protected List getSentencesOffsets(List tokens) { - SentenceDetector segmenter = new OpenNLPSentenceDetector(); + SentenceUtilities segmenter = SentenceUtilities.getInstance(); + String text = LayoutTokensUtil.toText(tokens); - List results = segmenter.detect(text); + List results = segmenter.runSentenceDetection(text); if (CollectionUtils.isEmpty(results)) { results = Arrays.asList(new OffsetPosition(0, text.length())); diff --git a/src/main/java/org/grobid/core/utilities/TextParser.java b/src/main/java/org/grobid/core/utilities/TextParser.java index c5eb11e6..7e1a9594 100644 --- a/src/main/java/org/grobid/core/utilities/TextParser.java +++ b/src/main/java/org/grobid/core/utilities/TextParser.java @@ -57,7 +57,7 @@ public class TextParser { private AbstractPredIdentifier predicater = null; private AbstractSRLabeler labeler = null; private DEPReader depReader = null; - private SentenceDetector segmenter; + private SentenceUtilities segmenter; // this is for version 1.3.0 of ClearNLP private CRolesetClassifier roleClassifier = null; @@ -120,7 +120,7 @@ private void init(String dictionaryFile, String posModelFile, String depModelFil depReader = new DEPReader(0, 1, 2, 3, 4, 5, 6); - segmenter = new OpenNLPSentenceDetector(); + segmenter = SentenceUtilities.getInstance(); } /** @@ -177,7 +177,7 @@ public synchronized List parseText(String text) throws GrobidException } List results = new ArrayList<>(); - List sentences = this.segmenter.detect(text); + List sentences = this.segmenter.runSentenceDetection(text); if (CollectionUtils.isEmpty(sentences)) { // there is some text but not in a state so that a sentence at least can be @@ -219,7 +219,7 @@ public List parse(BufferedReader reader) throws GrobidException { String text = reader.lines().collect(Collectors.joining()); - List sentences = segmenter.detect(text); + List sentences = segmenter.runSentenceDetection(text); for (OffsetPosition sentencePosition : sentences) { String sentence = text.substring(sentencePosition.start, sentencePosition.end); diff --git a/src/main/java/org/grobid/service/GrobidEngineInitialiser.java b/src/main/java/org/grobid/service/GrobidEngineInitialiser.java index 22f4e06a..67114c77 100644 --- a/src/main/java/org/grobid/service/GrobidEngineInitialiser.java +++ b/src/main/java/org/grobid/service/GrobidEngineInitialiser.java @@ -1,6 +1,8 @@ package org.grobid.service; import com.google.common.collect.ImmutableList; +import jakarta.inject.Inject; +import jakarta.inject.Singleton; import org.grobid.core.main.GrobidHomeFinder; import org.grobid.core.main.LibraryLoader; import org.grobid.core.utilities.GrobidProperties; @@ -8,8 +10,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.inject.Inject; -import javax.inject.Singleton; @Singleton public class GrobidEngineInitialiser { diff --git a/src/main/java/org/grobid/service/QuantitiesServiceModule.java b/src/main/java/org/grobid/service/QuantitiesServiceModule.java index 8fe1b2b1..04476c8c 100644 --- a/src/main/java/org/grobid/service/QuantitiesServiceModule.java +++ b/src/main/java/org/grobid/service/QuantitiesServiceModule.java @@ -2,9 +2,11 @@ import com.codahale.metrics.MetricRegistry; import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.inject.AbstractModule; import com.google.inject.Binder; import com.google.inject.Provides; -import com.hubspot.dropwizard.guicier.DropwizardAwareModule; +import jakarta.ws.rs.client.Client; +import jakarta.ws.rs.client.ClientBuilder; import org.grobid.core.engines.QuantitiesEngine; import org.grobid.core.engines.QuantityParser; import org.grobid.service.configuration.GrobidQuantitiesConfiguration; @@ -14,34 +16,32 @@ import org.grobid.service.exceptions.mapper.GrobidExceptionsTranslationUtility; import org.grobid.service.exceptions.mapper.GrobidServiceExceptionMapper; import org.grobid.service.exceptions.mapper.WebApplicationExceptionMapper; - -import javax.ws.rs.client.Client; -import javax.ws.rs.client.ClientBuilder; +import ru.vyarus.dropwizard.guice.module.support.DropwizardAwareModule; public class QuantitiesServiceModule extends DropwizardAwareModule { @Override - public void configure(Binder binder) { + public void configure() { // -- Generic modules -- - binder.bind(GrobidEngineInitialiser.class); - binder.bind(HealthCheck.class); + bind(GrobidEngineInitialiser.class); + bind(HealthCheck.class); //Services - binder.bind(QuantityParser.class); - binder.bind(QuantitiesEngine.class); + bind(QuantityParser.class); + bind(QuantitiesEngine.class); //REST - binder.bind(AnnotationController.class); + bind(AnnotationController.class); //Exception Mappers - directly imported from Grobid - binder.bind(GrobidServiceExceptionMapper.class); - binder.bind(GrobidExceptionsTranslationUtility.class); - binder.bind(GrobidExceptionMapper.class); - binder.bind(WebApplicationExceptionMapper.class); + bind(GrobidServiceExceptionMapper.class); + bind(GrobidExceptionsTranslationUtility.class); + bind(GrobidExceptionMapper.class); + bind(WebApplicationExceptionMapper.class); } - @Provides + /*@Provides protected ObjectMapper getObjectMapper() { return getEnvironment().getObjectMapper(); } @@ -54,7 +54,7 @@ protected MetricRegistry provideMetricRegistry() { //for unit tests protected MetricRegistry getMetricRegistry() { return getEnvironment().metrics(); - } + }*/ @Provides Client provideClient() { diff --git a/src/main/java/org/grobid/service/command/PrepareDelftTrainingCommand.java b/src/main/java/org/grobid/service/command/PrepareDelftTrainingCommand.java index be4991a5..fd3ed989 100644 --- a/src/main/java/org/grobid/service/command/PrepareDelftTrainingCommand.java +++ b/src/main/java/org/grobid/service/command/PrepareDelftTrainingCommand.java @@ -1,7 +1,7 @@ package org.grobid.service.command; -import io.dropwizard.cli.ConfiguredCommand; -import io.dropwizard.setup.Bootstrap; +import io.dropwizard.core.cli.ConfiguredCommand; +import io.dropwizard.core.setup.Bootstrap; import net.sourceforge.argparse4j.impl.Arguments; import net.sourceforge.argparse4j.inf.MutuallyExclusiveGroup; import net.sourceforge.argparse4j.inf.Namespace; diff --git a/src/main/java/org/grobid/service/command/RunTrainingCommand.java b/src/main/java/org/grobid/service/command/RunTrainingCommand.java index 98906753..ebc08a63 100644 --- a/src/main/java/org/grobid/service/command/RunTrainingCommand.java +++ b/src/main/java/org/grobid/service/command/RunTrainingCommand.java @@ -1,7 +1,7 @@ package org.grobid.service.command; -import io.dropwizard.cli.ConfiguredCommand; -import io.dropwizard.setup.Bootstrap; +import io.dropwizard.core.cli.ConfiguredCommand; +import io.dropwizard.core.setup.Bootstrap; import net.sourceforge.argparse4j.impl.Arguments; import net.sourceforge.argparse4j.inf.Namespace; import net.sourceforge.argparse4j.inf.Subparser; diff --git a/src/main/java/org/grobid/service/command/TrainingGenerationCommand.java b/src/main/java/org/grobid/service/command/TrainingGenerationCommand.java index bb383825..9cccf2d3 100644 --- a/src/main/java/org/grobid/service/command/TrainingGenerationCommand.java +++ b/src/main/java/org/grobid/service/command/TrainingGenerationCommand.java @@ -1,7 +1,7 @@ package org.grobid.service.command; -import io.dropwizard.cli.ConfiguredCommand; -import io.dropwizard.setup.Bootstrap; +import io.dropwizard.core.cli.ConfiguredCommand; +import io.dropwizard.core.setup.Bootstrap; import net.sourceforge.argparse4j.impl.Arguments; import net.sourceforge.argparse4j.inf.Namespace; import net.sourceforge.argparse4j.inf.Subparser; @@ -15,7 +15,6 @@ import org.slf4j.LoggerFactory; import java.io.File; -import java.util.Arrays; import java.util.Collections; public class TrainingGenerationCommand extends ConfiguredCommand { diff --git a/src/main/java/org/grobid/service/command/UnitBatchProcessingCommand.java b/src/main/java/org/grobid/service/command/UnitBatchProcessingCommand.java index adc2a8ad..4862257a 100644 --- a/src/main/java/org/grobid/service/command/UnitBatchProcessingCommand.java +++ b/src/main/java/org/grobid/service/command/UnitBatchProcessingCommand.java @@ -1,21 +1,17 @@ package org.grobid.service.command; -import io.dropwizard.cli.ConfiguredCommand; -import io.dropwizard.setup.Bootstrap; +import io.dropwizard.core.cli.ConfiguredCommand; +import io.dropwizard.core.setup.Bootstrap; import net.sourceforge.argparse4j.impl.Arguments; import net.sourceforge.argparse4j.inf.Namespace; import net.sourceforge.argparse4j.inf.Subparser; -import org.grobid.core.engines.Engine; import org.grobid.core.engines.QuantitiesEngine; -import org.grobid.core.main.GrobidHomeFinder; -import org.grobid.core.main.LibraryLoader; import org.grobid.core.utilities.GrobidProperties; import org.grobid.service.configuration.GrobidQuantitiesConfiguration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; -import java.util.Arrays; import static org.grobid.service.command.TrainingGenerationCommand.*; diff --git a/src/main/java/org/grobid/service/configuration/GrobidQuantitiesConfiguration.java b/src/main/java/org/grobid/service/configuration/GrobidQuantitiesConfiguration.java index b5ffda51..0be9ab9d 100755 --- a/src/main/java/org/grobid/service/configuration/GrobidQuantitiesConfiguration.java +++ b/src/main/java/org/grobid/service/configuration/GrobidQuantitiesConfiguration.java @@ -1,7 +1,7 @@ package org.grobid.service.configuration; import com.fasterxml.jackson.annotation.JsonProperty; -import io.dropwizard.Configuration; +import io.dropwizard.core.Configuration; import org.apache.commons.io.IOUtils; import org.grobid.core.utilities.GrobidConfig; import org.grobid.core.utilities.GrobidProperties; diff --git a/src/main/java/org/grobid/service/controller/AnnotationController.java b/src/main/java/org/grobid/service/controller/AnnotationController.java index dfa432cb..ab607a00 100644 --- a/src/main/java/org/grobid/service/controller/AnnotationController.java +++ b/src/main/java/org/grobid/service/controller/AnnotationController.java @@ -1,6 +1,9 @@ package org.grobid.service.controller; import com.codahale.metrics.annotation.Timed; +import jakarta.ws.rs.*; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; import org.glassfish.jersey.media.multipart.FormDataContentDisposition; import org.glassfish.jersey.media.multipart.FormDataParam; import org.grobid.core.data.MeasurementsResponse; @@ -10,11 +13,8 @@ import org.grobid.core.engines.QuantityParser; import org.grobid.service.configuration.GrobidQuantitiesConfiguration; -import javax.inject.Inject; -import javax.inject.Singleton; -import javax.ws.rs.*; -import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.Response; +import jakarta.inject.Inject; +import jakarta.inject.Singleton; import java.io.InputStream; import java.util.List; diff --git a/src/main/java/org/grobid/service/controller/HealthCheck.java b/src/main/java/org/grobid/service/controller/HealthCheck.java index de73a3de..afbbdb0c 100644 --- a/src/main/java/org/grobid/service/controller/HealthCheck.java +++ b/src/main/java/org/grobid/service/controller/HealthCheck.java @@ -2,14 +2,14 @@ import org.grobid.service.configuration.GrobidQuantitiesConfiguration; -import javax.inject.Inject; -import javax.inject.Singleton; -import javax.ws.rs.GET; -import javax.ws.rs.Path; -import javax.ws.rs.Produces; -import javax.ws.rs.core.Response; +import jakarta.inject.Inject; +import jakarta.inject.Singleton; +import jakarta.ws.rs.GET; +import jakarta.ws.rs.Path; +import jakarta.ws.rs.Produces; +import jakarta.ws.rs.core.Response; -import static javax.ws.rs.core.MediaType.APPLICATION_JSON; +import static jakarta.ws.rs.core.MediaType.APPLICATION_JSON; @Path("health") @Singleton @@ -20,7 +20,8 @@ public class HealthCheck extends com.codahale.metrics.health.HealthCheck { private GrobidQuantitiesConfiguration configuration; @Inject - public HealthCheck() { + public HealthCheck(GrobidQuantitiesConfiguration configuration) { + this.configuration = configuration; } @GET diff --git a/src/main/java/org/grobid/service/exceptions/GrobidServiceException.java b/src/main/java/org/grobid/service/exceptions/GrobidServiceException.java new file mode 100755 index 00000000..689cf15c --- /dev/null +++ b/src/main/java/org/grobid/service/exceptions/GrobidServiceException.java @@ -0,0 +1,30 @@ +package org.grobid.service.exceptions; + +import jakarta.ws.rs.core.Response; +import org.grobid.core.exceptions.GrobidException; + + +public class GrobidServiceException extends GrobidException { + + private static final long serialVersionUID = -756089338090769910L; + private Response.Status responseCode; + + public GrobidServiceException(Response.Status responseCode) { + super(); + this.responseCode = responseCode; + } + + public GrobidServiceException(String msg, Response.Status responseCode) { + super(msg); + this.responseCode = responseCode; + } + + public GrobidServiceException(String msg, Throwable cause, Response.Status responseCode) { + super(msg, cause); + this.responseCode = responseCode; + } + + public Response.Status getResponseCode() { + return responseCode; + } +} diff --git a/src/main/java/org/grobid/service/exceptions/mapper/GrobidExceptionMapper.java b/src/main/java/org/grobid/service/exceptions/mapper/GrobidExceptionMapper.java index 0a0eff9e..ff541d28 100644 --- a/src/main/java/org/grobid/service/exceptions/mapper/GrobidExceptionMapper.java +++ b/src/main/java/org/grobid/service/exceptions/mapper/GrobidExceptionMapper.java @@ -1,16 +1,16 @@ package org.grobid.service.exceptions.mapper; import com.google.inject.Inject; +import jakarta.ws.rs.core.Context; +import jakarta.ws.rs.core.HttpHeaders; +import jakarta.ws.rs.core.Response; +import jakarta.ws.rs.core.UriInfo; +import jakarta.ws.rs.ext.ExceptionMapper; +import jakarta.ws.rs.ext.Provider; import org.grobid.core.exceptions.GrobidException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import javax.ws.rs.core.Context; -import javax.ws.rs.core.HttpHeaders; -import javax.ws.rs.core.Response; -import javax.ws.rs.core.UriInfo; -import javax.ws.rs.ext.ExceptionMapper; -import javax.ws.rs.ext.Provider; @Provider public class GrobidExceptionMapper implements ExceptionMapper { diff --git a/src/main/java/org/grobid/service/exceptions/mapper/GrobidExceptionsTranslationUtility.java b/src/main/java/org/grobid/service/exceptions/mapper/GrobidExceptionsTranslationUtility.java new file mode 100644 index 00000000..8149611e --- /dev/null +++ b/src/main/java/org/grobid/service/exceptions/mapper/GrobidExceptionsTranslationUtility.java @@ -0,0 +1,114 @@ +package org.grobid.service.exceptions.mapper; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.google.common.base.Joiner; +import com.google.common.base.Throwables; +import jakarta.ws.rs.core.MediaType; +import jakarta.ws.rs.core.Response; +import jakarta.ws.rs.ext.Provider; +import org.grobid.core.exceptions.GrobidExceptionStatus; +import org.slf4j.MDC; + +import jakarta.inject.Inject; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +@Provider +public class GrobidExceptionsTranslationUtility { + + @Inject + public GrobidExceptionsTranslationUtility() { + } + + + public Response processException(Throwable exception, Response.Status status) { + try { + fillMdc(exception, status); + List descriptions = getExceptionDescriptions(exception, status); + +// String requestUri = uriInfo.getRequestUri().toString(); + return Response.status(status) + .type(MediaType.APPLICATION_JSON_TYPE) + .entity(buildJson(getExceptionName(exception), descriptions, status, null, null)) + .build(); + } finally { + cleanMdc(); + } + } + + public String getExceptionName(Throwable exception) { + String exceptionName = exception.getClass().getCanonicalName(); + if (exception.getCause() != null) { + exceptionName = exception.getCause().getClass().getCanonicalName(); + } + return exceptionName; + } + + + public void fillMdc(Throwable exception, Response.Status status) { + MDC.put("ExceptionName", getExceptionName(exception)); + MDC.put("StatusCode", String.valueOf(status.getStatusCode())); + MDC.put("ReasonPhrase", status.getReasonPhrase()); + MDC.put("StatusFamily", status.getFamily().toString()); + MDC.put("StackTrace", Throwables.getStackTraceAsString(exception)); + } + + public void cleanMdc() { + MDC.remove("ExceptionName"); + MDC.remove("StringErrorCode"); + MDC.remove("StatusCode"); + MDC.remove("ReasonPhrase"); + MDC.remove("StatusFamily"); + MDC.remove("StackTrace"); + } + + + public List getExceptionDescriptions(Throwable exception, Response.Status status) { + List descriptions = new ArrayList<>(); + + Throwable currentException = exception; + int maxIterations = 0; + while (currentException != null) { + StringBuilder sb = new StringBuilder(50); + sb.append(currentException.getClass().getName()); + if (currentException.getMessage() != null) { + sb.append(":").append(currentException.getMessage()); + } + descriptions.add(sb.toString()); + currentException = currentException.getCause(); + maxIterations++; + if (maxIterations > 4) { + break; + } + } + return descriptions; + } + + + public String buildJson(String type, List descriptions, Response.Status status, GrobidExceptionStatus grobidExceptionStatus, String requestUri) { + ObjectMapper mapper = new ObjectMapper(); + ObjectNode root = mapper.createObjectNode(); + root.put("type", type); + root.put("description", Joiner.on("\n").join(descriptions)); + root.put("code", status.getStatusCode()); + root.put("requestUri", requestUri); + String correlationId = MDC.get("correlationId"); + if (correlationId != null) { + root.put("correlationId", correlationId); + } + if (grobidExceptionStatus != null) { + root.put("grobidExceptionStatus", grobidExceptionStatus.name()); + } + + String json; + try { + json = mapper.writeValueAsString(root); + } catch (IOException e) { +// LOGGER.warn("Error in ServiceExceptionMapper: ", e); + json = "{\"description\": \"Internal error: " + e.getMessage() + "\"}"; + } + return json; + } +} diff --git a/src/main/java/org/grobid/service/exceptions/mapper/GrobidServiceExceptionMapper.java b/src/main/java/org/grobid/service/exceptions/mapper/GrobidServiceExceptionMapper.java new file mode 100644 index 00000000..54fbf143 --- /dev/null +++ b/src/main/java/org/grobid/service/exceptions/mapper/GrobidServiceExceptionMapper.java @@ -0,0 +1,36 @@ +package org.grobid.service.exceptions.mapper; + +import com.google.inject.Inject; +import jakarta.ws.rs.core.Context; +import jakarta.ws.rs.core.HttpHeaders; +import jakarta.ws.rs.core.Response; +import jakarta.ws.rs.core.UriInfo; +import jakarta.ws.rs.ext.ExceptionMapper; +import jakarta.ws.rs.ext.Provider; +import org.grobid.service.exceptions.GrobidServiceException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@Provider +public class GrobidServiceExceptionMapper implements ExceptionMapper { + private static final Logger LOGGER = LoggerFactory.getLogger(ExceptionMapper.class); + + @Context + protected HttpHeaders headers; + + @Context + private UriInfo uriInfo; + + @Inject + private GrobidExceptionsTranslationUtility mapper; + + @Inject + public GrobidServiceExceptionMapper() { + + } + + @Override + public Response toResponse(GrobidServiceException exception) { + return mapper.processException(exception, exception.getResponseCode()); + } +} diff --git a/src/main/java/org/grobid/service/exceptions/mapper/GrobidStatusToHttpStatusMapper.java b/src/main/java/org/grobid/service/exceptions/mapper/GrobidStatusToHttpStatusMapper.java new file mode 100644 index 00000000..3665003c --- /dev/null +++ b/src/main/java/org/grobid/service/exceptions/mapper/GrobidStatusToHttpStatusMapper.java @@ -0,0 +1,32 @@ +package org.grobid.service.exceptions.mapper; + +import jakarta.ws.rs.core.Response; +import org.grobid.core.exceptions.GrobidExceptionStatus; + + +public class GrobidStatusToHttpStatusMapper { + public static Response.Status getStatusCode(GrobidExceptionStatus status) { + switch (status) { + case BAD_INPUT_DATA: + return Response.Status.BAD_REQUEST; + case TAGGING_ERROR: + return Response.Status.INTERNAL_SERVER_ERROR; + case PARSING_ERROR: + return Response.Status.INTERNAL_SERVER_ERROR; + case TIMEOUT: + return Response.Status.CONFLICT; + case TOO_MANY_BLOCKS: + return Response.Status.CONFLICT; + case NO_BLOCKS: + return Response.Status.BAD_REQUEST; + case PDFALTO_CONVERSION_FAILURE: + return Response.Status.INTERNAL_SERVER_ERROR; + case TOO_MANY_TOKENS: + return Response.Status.CONFLICT; + case GENERAL: + return Response.Status.INTERNAL_SERVER_ERROR; + default: + return Response.Status.INTERNAL_SERVER_ERROR; + } + } +} diff --git a/src/main/java/org/grobid/service/exceptions/mapper/WebApplicationExceptionMapper.java b/src/main/java/org/grobid/service/exceptions/mapper/WebApplicationExceptionMapper.java new file mode 100644 index 00000000..67d7a69f --- /dev/null +++ b/src/main/java/org/grobid/service/exceptions/mapper/WebApplicationExceptionMapper.java @@ -0,0 +1,26 @@ +package org.grobid.service.exceptions.mapper; + +import com.google.inject.Inject; +import jakarta.ws.rs.WebApplicationException; +import jakarta.ws.rs.core.Response; +import jakarta.ws.rs.ext.ExceptionMapper; +import jakarta.ws.rs.ext.Provider; + + +@Provider +public class WebApplicationExceptionMapper implements ExceptionMapper { + + @Inject + public WebApplicationExceptionMapper() { + } + + @Override + public Response toResponse(WebApplicationException exception) { + Response.Status exceptionStatus = Response.Status.fromStatusCode(exception.getResponse().getStatus()); + if (exceptionStatus != null) { + return Response.status(exceptionStatus).build(); + } + + return Response.status(Response.Status.INTERNAL_SERVER_ERROR).build(); + } +} diff --git a/src/main/java/org/grobid/service/main/GrobidQuantitiesApplication.java b/src/main/java/org/grobid/service/main/GrobidQuantitiesApplication.java index 0f5ce412..5088d381 100755 --- a/src/main/java/org/grobid/service/main/GrobidQuantitiesApplication.java +++ b/src/main/java/org/grobid/service/main/GrobidQuantitiesApplication.java @@ -1,28 +1,28 @@ package org.grobid.service.main; -import com.google.common.collect.Lists; -import com.google.inject.Module; -import com.hubspot.dropwizard.guicier.GuiceBundle; -import io.dropwizard.Application; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.google.inject.AbstractModule; import io.dropwizard.assets.AssetsBundle; +import io.dropwizard.core.Application; +import io.dropwizard.core.setup.Bootstrap; +import io.dropwizard.core.setup.Environment; import io.dropwizard.forms.MultiPartBundle; -import io.dropwizard.setup.Bootstrap; -import io.dropwizard.setup.Environment; +import jakarta.servlet.DispatcherType; +import jakarta.servlet.FilterRegistration; import org.eclipse.jetty.servlets.CrossOriginFilter; import org.eclipse.jetty.servlets.QoSFilter; import org.grobid.service.QuantitiesServiceModule; import org.grobid.service.command.PrepareDelftTrainingCommand; import org.grobid.service.command.RunTrainingCommand; -import org.grobid.service.command.UnitBatchProcessingCommand; import org.grobid.service.command.TrainingGenerationCommand; +import org.grobid.service.command.UnitBatchProcessingCommand; import org.grobid.service.configuration.GrobidQuantitiesConfiguration; +import org.grobid.service.controller.HealthCheck; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import ru.vyarus.dropwizard.guice.GuiceBundle; -import javax.servlet.DispatcherType; -import javax.servlet.FilterRegistration; import java.util.EnumSet; -import java.util.List; public class GrobidQuantitiesApplication extends Application { private static final Logger LOGGER = LoggerFactory.getLogger(GrobidQuantitiesApplication.class); @@ -35,17 +35,19 @@ public String getName() { return "grobid-quantities"; } - private List getGuiceModules() { - return Lists.newArrayList(new QuantitiesServiceModule()); + private AbstractModule getGuiceModules() { + return new QuantitiesServiceModule(); } @Override public void initialize(Bootstrap bootstrap) { - GuiceBundle guiceBundle = GuiceBundle.defaultBuilder(GrobidQuantitiesConfiguration.class) - .modules(getGuiceModules()) - .build(); + GuiceBundle guiceBundle = GuiceBundle.builder() + .modules(getGuiceModules()) + .build(); + bootstrap.addBundle(guiceBundle); bootstrap.addBundle(new MultiPartBundle()); + bootstrap.getObjectMapper().enable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES); bootstrap.addBundle(new AssetsBundle("/web", "/", "index.html", "assets")); bootstrap.addCommand(new TrainingGenerationCommand()); bootstrap.addCommand(new UnitBatchProcessingCommand()); @@ -55,6 +57,9 @@ public void initialize(Bootstrap bootstrap) { @Override public void run(GrobidQuantitiesConfiguration configuration, Environment environment) { + + environment.healthChecks().register("health-check", new HealthCheck(configuration)); + LOGGER.info("Service config={}", configuration); environment.jersey().setUrlPattern(RESOURCES + "/*"); @@ -78,7 +83,6 @@ public void run(GrobidQuantitiesConfiguration configuration, Environment environ final FilterRegistration.Dynamic qos = environment.servlets().addFilter("QOS", QoSFilter.class); qos.addMappingForUrlPatterns(EnumSet.allOf(DispatcherType.class), true, "/*"); qos.setInitParameter("maxRequests", String.valueOf(configuration.getMaxParallelRequests())); - } public static void main(String[] args) throws Exception { diff --git a/src/main/resources/web/index.html b/src/main/resources/web/index.html index ffa37783..9551aca9 100644 --- a/src/main/resources/web/index.html +++ b/src/main/resources/web/index.html @@ -157,7 +157,7 @@