Skip to content

Commit

Permalink
doc; prepare coord in affiliations
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Nov 18, 2023
1 parent 6f4bcdb commit 169dcfd
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 4 deletions.
4 changes: 3 additions & 1 deletion doc/Grobid-docker.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ By default, this image runs Deep Learning models for:

- segmentation of the bibliographical reference section into individual references,

- parsing of the header metadata.
- parsing of the header metadata,

- parsing of funding and acknowledgement sections.

With a GPU (at least 4GB GPU memory required), the processing runtime is similar as with the CRF-only image with CPU only.

Expand Down
17 changes: 14 additions & 3 deletions grobid-core/src/main/java/org/grobid/core/data/Affiliation.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import org.grobid.core.lexicon.Lexicon;
import org.grobid.core.layout.LayoutToken;
import org.grobid.core.utilities.OffsetPosition;
import org.grobid.core.engines.label.TaggingLabel;

import java.util.ArrayList;
import java.util.List;
import java.util.*;

/**
* Class for representing and exchanging affiliation information.
Expand Down Expand Up @@ -37,6 +37,9 @@ public class Affiliation {

private List<LayoutToken> layoutTokens = null;

// map of model labels to LayoutToken
private Map<String, List<LayoutToken>> labeledTokens;

// an identifier for the affiliation independent from the marker, present in the TEI result
private String key = null;

Expand Down Expand Up @@ -672,5 +675,13 @@ public String toString() {
", failAffiliation=" + failAffiliation +
'}';
}


public void addLabeledResult(TaggingLabel label, List<LayoutToken> tokenizations) {
if (labeledTokens == null)
labeledTokens = new TreeMap<>();

List<LayoutToken> theTokenList = tokenizations == null ? new ArrayList<>() : tokenizations;
labeledTokens.put(label.getLabel(), theTokenList);
}

}
11 changes: 11 additions & 0 deletions grobid-core/src/main/java/org/grobid/core/data/BiblioItem.java
Original file line number Diff line number Diff line change
Expand Up @@ -3779,10 +3779,21 @@ private void appendAffiliation(
GrobidAnalysisConfig config,
Lexicon lexicon
) {
boolean affiliationWithCoords = (config.getGenerateTeiCoordinates() != null) && (config.getGenerateTeiCoordinates().contains("affiliation"));
boolean orgnameWithCoords = (config.getGenerateTeiCoordinates() != null) && (config.getGenerateTeiCoordinates().contains("orgName"));

TextUtilities.appendN(tei, '\t', nbTag);
tei.append("<affiliation");
if (aff.getKey() != null)
tei.append(" key=\"").append(aff.getKey()).append("\"");
if (affiliationWithCoords) {
// we serialize the coordinates for the whole affiliation block
List<LayoutToken> affTokens = aff.getLayoutTokens();
String coords = LayoutTokensUtil.getCoordsString(affTokens);
if (coords != null && coords.length()>0) {
tei.append(" coord=\"" + coords + "\"");
}
}
tei.append(">\n");

if (
Expand Down

0 comments on commit 169dcfd

Please sign in to comment.