Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to Dropwizard 4 #136

Merged
merged 29 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
41017ce
Update to dropwizard 2
lfoppiano May 18, 2022
d460a3a
update grobid 0.7.1 image
lfoppiano May 18, 2022
fdcb3b2
Merge branch 'master' into feature/dropwizard2
lfoppiano May 23, 2022
05ce962
fix configuration with dropwizard 2
lfoppiano May 23, 2022
b8900a6
update dropwizard to the latest version
lfoppiano May 23, 2022
e95b763
Merge branch 'master' into feature/dropwizard2
lfoppiano May 24, 2022
79db60a
Merge branch 'master' into feature/dropwizard2
lfoppiano Jun 16, 2023
5c65b01
update dependencies, fixed code compilation to dropwizard 4
lfoppiano Jun 16, 2023
bc3dff9
Update migration
lfoppiano Jun 19, 2023
cd16548
Fix servlet-api
lfoppiano Jun 19, 2023
edf8bff
Fix sentence detection initialisation
lfoppiano Jun 19, 2023
3f91583
Update to grobid 0.8.0
lfoppiano Dec 14, 2023
fa9a3df
prepare new version
lfoppiano Dec 14, 2023
463c2b9
Merge branch 'master' into feature/dropwizard2
lfoppiano Dec 14, 2023
94b406d
catch up with the changelog
lfoppiano Dec 15, 2023
5266213
propagrate change from the grobid header parser
lfoppiano Dec 15, 2023
d62dd3d
update copyright
lfoppiano Dec 15, 2023
5b8719d
add citation
lfoppiano Dec 15, 2023
40b493f
update citation
lfoppiano Dec 15, 2023
f896783
cleanup
lfoppiano Dec 15, 2023
83b2416
Update CITATION.cff
lfoppiano Dec 15, 2023
31ad89c
Update CITATION.cff
lfoppiano Dec 15, 2023
b76d74c
Update CITATION.cff
lfoppiano Dec 15, 2023
f225291
cleanup
lfoppiano Dec 15, 2023
4a7344e
fix copyright notice
lfoppiano Dec 15, 2023
c330f1c
update libraries
lfoppiano Dec 15, 2023
bd30ba2
update libraries
lfoppiano Dec 15, 2023
8105411
Merge branch 'master' into feature/dropwizard2
lfoppiano Dec 15, 2023
51ca156
add manual trigger for docker
lfoppiano Dec 18, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/ci-build-manual.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Build and push a development version on docker

on:
workflow_dispatch:


jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v1
- name: Set up JDK 17
uses: actions/setup-java@v1
with:
java-version: 1.17
- name: Build with Gradle
run: ./gradlew build -x test

docker-build:
needs: [ build ]
runs-on: ubuntu-latest

steps:
- name: Create more disk space
run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
- uses: actions/checkout@v2
- name: Build and push
id: docker_build
uses: mr-smithers-excellent/docker-build-push@v5
with:
dockerfile: Dockerfile.local
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
image: lfoppiano/grobid-quantities
registry: docker.io
pushImage: true
tags: latest-develop
- name: Image digest
run: echo ${{ steps.docker_build.outputs.digest }}
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

### Changed

+ Updated to Grobid version 0.8.0
+ Updated to Grobid version 0.8.0
+ Updated to Dropwizard version 4.x (from version 1.x)



## [0.7.3] – 2023-06-26

### Added
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ WORKDIR /opt

FROM grobid/grobid:0.7.3 as runtime


# setting locale is likely useless but to be sure
ENV LANG C.UTF-8

Expand Down
39 changes: 20 additions & 19 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ repositories {

dependencies {
//Tests
testImplementation 'org.junit.vintage:junit-vintage-engine:5.9.3'
testRuntimeOnly 'org.junit.vintage:junit-vintage-engine:5.9.3'
testImplementation(platform('org.junit:junit-bom:5.9.3'))
testImplementation('org.junit.jupiter:junit-jupiter')
testImplementation 'org.easymock:easymock:5.1.0'
Expand All @@ -84,9 +84,9 @@ dependencies {
testImplementation 'org.jetbrains.kotlin:kotlin-test'

//GROBID
implementation 'org.grobid:grobid-core:0.7.3'
implementation 'org.grobid:grobid-trainer:0.7.3'
implementation 'org.grobid:grobid-service:0.7.3'
implementation 'org.grobid:grobid-core:0.8.0'
implementation 'org.grobid:grobid-trainer:0.8.0'
implementation 'org.grobid:grobid-service:0.8.0'
implementation "xerces:xercesImpl:2.12.0"
implementation "net.arnx:jsonic:1.3.10"
implementation "net.sf.saxon:Saxon-HE:9.6.0-9"
Expand All @@ -104,10 +104,10 @@ dependencies {
implementation 'commons-pool:commons-pool:1.6'

//Json
implementation 'com.fasterxml.jackson.core:jackson-core:2.13.3'
implementation 'com.fasterxml.jackson.core:jackson-databind:2.13.3'
implementation 'com.fasterxml.jackson.module:jackson-module-afterburner:2.13.3'
implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.13.3'
implementation 'com.fasterxml.jackson.core:jackson-core:2.14.3'
implementation 'com.fasterxml.jackson.core:jackson-databind:2.14.3'
implementation 'com.fasterxml.jackson.module:jackson-module-afterburner:2.14.3'
implementation 'com.fasterxml.jackson.dataformat:jackson-dataformat-yaml:2.14.3'

// measurements

Expand All @@ -122,20 +122,21 @@ dependencies {
implementation group: 'systems.uom', name: 'systems-unicode', version: '2.1'

//Dropwizard
implementation "com.hubspot.dropwizard:dropwizard-guicier:1.3.5.2"
implementation "io.dropwizard:dropwizard-core:1.3.29"
implementation "io.dropwizard:dropwizard-assets:1.3.29"
implementation "io.dropwizard:dropwizard-testing:1.3.29"
implementation "io.dropwizard:dropwizard-forms:1.3.29"
implementation "io.dropwizard:dropwizard-client:1.3.29"
implementation "io.dropwizard:dropwizard-auth:1.3.29"
implementation "io.dropwizard.metrics:metrics-core:4.0.0"
implementation "io.dropwizard.metrics:metrics-servlets:4.0.0"
implementation 'javax.servlet:javax.servlet-api:3.1.0'
implementation 'ru.vyarus:dropwizard-guicey:7.0.0'

implementation 'io.dropwizard:dropwizard-bom:4.0.0'
implementation 'io.dropwizard:dropwizard-core:4.0.0'
implementation 'io.dropwizard:dropwizard-assets:4.0.0'
implementation 'io.dropwizard:dropwizard-testing:4.0.0'
implementation 'io.dropwizard:dropwizard-forms:4.0.0'
implementation 'io.dropwizard:dropwizard-client:4.0.0'
implementation 'io.dropwizard:dropwizard-auth:4.0.0'
implementation 'io.dropwizard.metrics:metrics-core:4.2.22'
implementation 'io.dropwizard.metrics:metrics-servlets:4.2.22'

//Misc
implementation group: 'com.googlecode.clearnlp', name: 'clearnlp', version: '1.3.1'
implementation 'com.google.guava:guava:30.1.1-jre'
implementation 'com.google.guava:guava:31.0.1-jre'
implementation "net.arnx:jsonic:1.3.10"

// XML
Expand Down
2 changes: 1 addition & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
version=0.7.4-SNAPSHOT
version=0.8.0-SNAPSHOT
Binary file not shown.
Binary file not shown.
Binary file not shown.
11 changes: 6 additions & 5 deletions resources/config/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,29 +56,31 @@ models:
embeddings_name: "glove-840B"


views:
.mustache:
cache: false
#views:
# .mustache:
# cache: false

server:
type: custom
applicationConnectors:
- type: http
port: 8060
idleTimeout: 120 seconds
acceptQueueSize: 2048

adminConnectors:
- type: http
port: 8061
registerDefaultExceptionMappers: false
maxThreads: 2048
maxQueuedRequests: 2048
acceptQueueSize: 2048

logging:
level: INFO
appenders:
- type: console
threshold: INFO
timeZone: UTC
#Docker-ignore-log-start
- type: file
threshold: DEBUG
Expand All @@ -89,5 +91,4 @@ logging:
timeZone: UTC
maxFileSize: 10MB
#Docker-ignore-log-end
timeZone: UTC

8 changes: 4 additions & 4 deletions src/main/java/org/grobid/core/engines/QuantitiesEngine.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import javax.inject.Inject;
import javax.inject.Singleton;
import javax.ws.rs.core.Response;
import jakarta.inject.Inject;
import jakarta.inject.Singleton;
import jakarta.ws.rs.core.Response;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
Expand Down Expand Up @@ -129,7 +129,7 @@ public MeasurementsResponse processPdf(InputStream inputStream) {

BiblioItem resHeader = new BiblioItem();
//parsers.getHeaderParser().processingHeaderSection(false, doc, resHeader);
resHeader.generalResultMapping(labeledResult, tokenizationHeader);
resHeader.generalResultMappingHeader(labeledResult, tokenizationHeader);

// title
List<LayoutToken> titleTokens = resHeader.getLayoutTokens(TaggingLabels.HEADER_TITLE);
Expand Down
10 changes: 4 additions & 6 deletions src/main/java/org/grobid/core/engines/QuantityParser.java
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package org.grobid.core.engines;

import com.google.common.collect.Iterables;
import jakarta.inject.Inject;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.log4j.Layout;
import org.grobid.core.GrobidModel;
import org.grobid.core.analyzers.QuantityAnalyzer;
import org.grobid.core.data.Measurement;
Expand All @@ -18,8 +18,6 @@
import org.grobid.core.engines.label.TaggingLabel;
import org.grobid.core.exceptions.GrobidException;
import org.grobid.core.features.FeaturesVectorQuantities;
import org.grobid.core.lang.SentenceDetector;
import org.grobid.core.lang.impl.OpenNLPSentenceDetector;
import org.grobid.core.layout.BoundingBox;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.lexicon.QuantityLexicon;
Expand All @@ -29,7 +27,6 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.inject.Inject;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Arrays;
Expand Down Expand Up @@ -182,9 +179,10 @@ public List<Measurement> process(List<LayoutToken> layoutTokens) {
}

protected List<OffsetPosition> getSentencesOffsets(List<LayoutToken> tokens) {
SentenceDetector segmenter = new OpenNLPSentenceDetector();
SentenceUtilities segmenter = SentenceUtilities.getInstance();

String text = LayoutTokensUtil.toText(tokens);
List<OffsetPosition> results = segmenter.detect(text);
List<OffsetPosition> results = segmenter.runSentenceDetection(text);

if (CollectionUtils.isEmpty(results)) {
results = Arrays.asList(new OffsetPosition(0, text.length()));
Expand Down
8 changes: 4 additions & 4 deletions src/main/java/org/grobid/core/utilities/TextParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public class TextParser {
private AbstractPredIdentifier predicater = null;
private AbstractSRLabeler labeler = null;
private DEPReader depReader = null;
private SentenceDetector segmenter;
private SentenceUtilities segmenter;

// this is for version 1.3.0 of ClearNLP
private CRolesetClassifier roleClassifier = null;
Expand Down Expand Up @@ -120,7 +120,7 @@ private void init(String dictionaryFile, String posModelFile, String depModelFil

depReader = new DEPReader(0, 1, 2, 3, 4, 5, 6);

segmenter = new OpenNLPSentenceDetector();
segmenter = SentenceUtilities.getInstance();
}

/**
Expand Down Expand Up @@ -177,7 +177,7 @@ public synchronized List<Sentence> parseText(String text) throws GrobidException
}

List<Sentence> results = new ArrayList<>();
List<OffsetPosition> sentences = this.segmenter.detect(text);
List<OffsetPosition> sentences = this.segmenter.runSentenceDetection(text);

if (CollectionUtils.isEmpty(sentences)) {
// there is some text but not in a state so that a sentence at least can be
Expand Down Expand Up @@ -219,7 +219,7 @@ public List<Sentence> parse(BufferedReader reader) throws GrobidException {

String text = reader.lines().collect(Collectors.joining());

List<OffsetPosition> sentences = segmenter.detect(text);
List<OffsetPosition> sentences = segmenter.runSentenceDetection(text);

for (OffsetPosition sentencePosition : sentences) {
String sentence = text.substring(sentencePosition.start, sentencePosition.end);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/grobid/service/GrobidEngineInitialiser.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
package org.grobid.service;

import com.google.common.collect.ImmutableList;
import jakarta.inject.Inject;
import jakarta.inject.Singleton;
import org.grobid.core.main.GrobidHomeFinder;
import org.grobid.core.main.LibraryLoader;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.service.configuration.GrobidQuantitiesConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.inject.Inject;
import javax.inject.Singleton;

@Singleton
public class GrobidEngineInitialiser {
Expand Down
32 changes: 16 additions & 16 deletions src/main/java/org/grobid/service/QuantitiesServiceModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@

import com.codahale.metrics.MetricRegistry;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.inject.AbstractModule;
import com.google.inject.Binder;
import com.google.inject.Provides;
import com.hubspot.dropwizard.guicier.DropwizardAwareModule;
import jakarta.ws.rs.client.Client;
import jakarta.ws.rs.client.ClientBuilder;
import org.grobid.core.engines.QuantitiesEngine;
import org.grobid.core.engines.QuantityParser;
import org.grobid.service.configuration.GrobidQuantitiesConfiguration;
Expand All @@ -14,34 +16,32 @@
import org.grobid.service.exceptions.mapper.GrobidExceptionsTranslationUtility;
import org.grobid.service.exceptions.mapper.GrobidServiceExceptionMapper;
import org.grobid.service.exceptions.mapper.WebApplicationExceptionMapper;

import javax.ws.rs.client.Client;
import javax.ws.rs.client.ClientBuilder;
import ru.vyarus.dropwizard.guice.module.support.DropwizardAwareModule;


public class QuantitiesServiceModule extends DropwizardAwareModule<GrobidQuantitiesConfiguration> {

@Override
public void configure(Binder binder) {
public void configure() {
// -- Generic modules --
binder.bind(GrobidEngineInitialiser.class);
binder.bind(HealthCheck.class);
bind(GrobidEngineInitialiser.class);
bind(HealthCheck.class);

//Services
binder.bind(QuantityParser.class);
binder.bind(QuantitiesEngine.class);
bind(QuantityParser.class);
bind(QuantitiesEngine.class);

//REST
binder.bind(AnnotationController.class);
bind(AnnotationController.class);

//Exception Mappers - directly imported from Grobid
binder.bind(GrobidServiceExceptionMapper.class);
binder.bind(GrobidExceptionsTranslationUtility.class);
binder.bind(GrobidExceptionMapper.class);
binder.bind(WebApplicationExceptionMapper.class);
bind(GrobidServiceExceptionMapper.class);
bind(GrobidExceptionsTranslationUtility.class);
bind(GrobidExceptionMapper.class);
bind(WebApplicationExceptionMapper.class);
}

@Provides
/*@Provides
protected ObjectMapper getObjectMapper() {
return getEnvironment().getObjectMapper();
}
Expand All @@ -54,7 +54,7 @@ protected MetricRegistry provideMetricRegistry() {
//for unit tests
protected MetricRegistry getMetricRegistry() {
return getEnvironment().metrics();
}
}*/

@Provides
Client provideClient() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.grobid.service.command;

import io.dropwizard.cli.ConfiguredCommand;
import io.dropwizard.setup.Bootstrap;
import io.dropwizard.core.cli.ConfiguredCommand;
import io.dropwizard.core.setup.Bootstrap;
import net.sourceforge.argparse4j.impl.Arguments;
import net.sourceforge.argparse4j.inf.MutuallyExclusiveGroup;
import net.sourceforge.argparse4j.inf.Namespace;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package org.grobid.service.command;

import io.dropwizard.cli.ConfiguredCommand;
import io.dropwizard.setup.Bootstrap;
import io.dropwizard.core.cli.ConfiguredCommand;
import io.dropwizard.core.setup.Bootstrap;
import net.sourceforge.argparse4j.impl.Arguments;
import net.sourceforge.argparse4j.inf.Namespace;
import net.sourceforge.argparse4j.inf.Subparser;
Expand Down
Loading
Loading