Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1443 - Make BratReader more forgiving #1444

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
import com.fasterxml.jackson.annotation.Nulls;
import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;

import eu.openminted.share.annotations.api.DocumentationResource;

Expand Down Expand Up @@ -166,6 +167,7 @@ public class BratReader
private String mappingJson;

private Mapping mapping;
private static Mapping defaultMapping = null;

private Map<String, AnnotationFS> idMap;

Expand All @@ -177,16 +179,14 @@ public void initialize(UimaContext aContext)
{
super.initialize(aContext);

try {
mapping = getDefaultMapping();
} catch (IOException e) {
throw new ResourceInitializationException(e);
}

if (mappingJson != null) {
ObjectMapper mapper = new ObjectMapper();
mapper.setDefaultSetterInfo(JsonSetter.Value.forContentNulls(Nulls.AS_EMPTY));
mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true);
try {
mapping = mapper.readValue(mappingJson, Mapping.class);
}
catch (IOException e) {
throw new ResourceInitializationException(e);
}
mapping = Mapping.merge(json2Mapping(mappingJson), mapping);
}
else {
Map<String, RelationMapping> parsedRelationTypes = new HashMap<>();
Expand All @@ -213,7 +213,31 @@ public void initialize(UimaContext aContext)
warnings = new LinkedHashSet<String>();
}

@Override
private ObjectMapper mapper() {
ObjectMapper mapper = new ObjectMapper();
mapper.setDefaultSetterInfo(JsonSetter.Value.forContentNulls(Nulls.AS_EMPTY));
mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true);
mapper.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false);

return mapper;
}

private Mapping json2Mapping(String mappingJson2) throws ResourceInitializationException {
ObjectMapper mapper = new ObjectMapper();
mapper.setDefaultSetterInfo(JsonSetter.Value.forContentNulls(Nulls.AS_EMPTY));
mapper.configure(JsonParser.Feature.ALLOW_SINGLE_QUOTES, true);
mapper.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false);
Mapping deserialized = null;
try {
deserialized = mapper.readValue(mappingJson, Mapping.class);
} catch (IOException e) {
throw new ResourceInitializationException(e);
}

return deserialized;
}

@Override
public void close()
throws IOException
{
Expand Down Expand Up @@ -566,4 +590,57 @@ private Feature getFeature(FeatureStructure aFS, String aName)
}
return f;
}


//////////////////////////////////////////
// Start of Improvements to BratReader
// -- Alain Désilets
//////////////////////////////////////////

private Mapping getDefaultMapping() throws ResourceInitializationException, IOException {
if (defaultMapping == null) {

// AD: Couldn't get this to work.
// Serializing/deserializing Mapping object seems to
// be very brittle
// String json = ResourceGetter.readResourceFileToString("org/dkpro/core/io/brat/defaultMapping.json");
// defaultMapping = json2Mapping(json);

List<TypeMapping> textMappingLst = new ArrayList<TypeMapping>();
{

textMappingLst.add(new TypeMapping("CARDINAL","de.tudarmstadt.ukp.dkpro.core.api.ner.type.Cardinal"));
textMappingLst.add(new TypeMapping("Country","de.tudarmstadt.ukp.dkpro.core.api.ner.type.Location"));
textMappingLst.add(new TypeMapping("DATE","de.tudarmstadt.ukp.dkpro.core.api.ner.type.Date"));
textMappingLst.add(new TypeMapping("ORDINAL","de.tudarmstadt.ukp.dkpro.core.api.ner.type.Ordinal"));
textMappingLst.add(new TypeMapping("LOC","de.tudarmstadt.ukp.dkpro.core.api.ner.type.Location"));
textMappingLst.add(new TypeMapping("MERGE-ORG","de.tudarmstadt.ukp.dkpro.core.io.brat.type.MergeOrg"));
textMappingLst.add(new TypeMapping("ORG","de.tudarmstadt.ukp.dkpro.core.api.ner.type.Organization"));
textMappingLst.add(new TypeMapping("Organization","de.tudarmstadt.ukp.dkpro.core.api.ner.type.Organization"));
textMappingLst.add(new TypeMapping("PER","de.tudarmstadt.ukp.dkpro.core.api.ner.type.Person"));
textMappingLst.add(new TypeMapping("TIME","de.tudarmstadt.ukp.dkpro.core.api.ner.type.Time"));
}
TypeMappings textMappings = new TypeMappings(textMappingLst);


List<TypeMapping> relMappingLst = new ArrayList<TypeMapping>();{
relMappingLst.add(new TypeMapping("Origin", "de.tudarmstadt.ukp.dkpro.core.io.brat.type.AnnotationRelation"));
}
TypeMappings relationMappings = new TypeMappings(relMappingLst);

List<SpanMapping> spans = new ArrayList<SpanMapping>();
{
String spanJson = "{'type':'de.tudarmstadt.ukp.dkpro.core.api.ner.type.Location', 'defaultFeatureValues': {'value': 'LOC'}}";
SpanMapping aSpan = mapper().readValue(spanJson, SpanMapping.class);
spans.add(aSpan);
}

List<RelationMapping> relations = new ArrayList<RelationMapping>();
List<CommentMapping> comments = new ArrayList<CommentMapping>();

defaultMapping = new Mapping(textMappings, relationMappings, spans, relations, comments);
}

return defaultMapping;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import static java.util.function.Function.identity;
import static java.util.stream.Collectors.toMap;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -86,4 +87,56 @@ public Collection<CommentMapping> getCommentMapping(String aType)
{
return comments.get(aType);
}

public static Mapping merge(Mapping mapping1, Mapping mapping2) {

List<TypeMapping> mergedTxtTMLst = new ArrayList<TypeMapping>();
mergedTxtTMLst.addAll(mapping1.getTextTypeMapppings().parsedMappings);
for (TypeMapping tm: mapping2.getTextTypeMapppings().parsedMappings) {
if (!mergedTxtTMLst.contains(tm)) {
mergedTxtTMLst.add(tm);
}
}
TypeMappings mergedTxtTM = new TypeMappings(mergedTxtTMLst);

List<TypeMapping> mergedRelTMLst = new ArrayList<TypeMapping>();
mergedRelTMLst.addAll(mapping1.getRelationTypeMapppings().parsedMappings);
for (TypeMapping tm: mapping2.getRelationTypeMapppings().parsedMappings) {
if (!mergedRelTMLst.contains(tm)) {
mergedRelTMLst.add(tm);
}
}
TypeMappings mergedRelTM = new TypeMappings(mergedRelTMLst);

List<SpanMapping> mergedTxtAnnots = new ArrayList<SpanMapping>();
mergedTxtAnnots.addAll(mapping1.textAnnotations.values());

// AD: If you uncomment this, test1mapping fails
// for (SpanMapping sm: mapping2.textAnnotations.values()) {
// if (!mergedTxtAnnots.contains(sm)) {
// mergedTxtAnnots.add(sm);
// }
// }

List<RelationMapping> mergedRelMapping = new ArrayList<RelationMapping>();
mergedRelMapping.addAll(mapping1.relations.values());
for (RelationMapping rm: mapping2.relations.values()) {
if (!mergedRelMapping.contains(rm)) {
mergedRelMapping.add(rm);
}
}

List<CommentMapping> mergedCommMapping = new ArrayList<CommentMapping>();
mergedCommMapping.addAll(mapping1.comments.values());
for (CommentMapping cm: mapping2.comments.values()) {
if (!mergedCommMapping.contains(cm)) {
mergedCommMapping.add(cm);
}
}

Mapping mergedMapping =
new Mapping(mergedTxtTM, mergedRelTM, mergedTxtAnnots, mergedRelMapping, mergedCommMapping);

return mergedMapping;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ public class TypeMapping
private static final String BRAT = "BRAT";
private static final String UIMA = "UIMA";

private final Pattern bratTypePattern;
private final String uimaType;
private final Map<String, String> defaultFeatureValues;
public final Pattern bratTypePattern;
public final String uimaType;
public final Map<String, String> defaultFeatureValues;

private Matcher matcher;

Expand All @@ -52,6 +52,10 @@ public TypeMapping(
defaultFeatureValues = aDefaults != null ? aDefaults : Collections.emptyMap();
}

public TypeMapping() {
this(null, null, Collections.emptyMap());
}

public TypeMapping(String aPattern, String aReplacement)
{
this(aPattern, aReplacement, Collections.emptyMap());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,27 +30,36 @@

public class TypeMappings
{
private final List<TypeMapping> parsedMappings;
private final Map<String, Type> brat2UimaMappingCache;
private final Map<String, String> uima2BratMappingCache;
public List<TypeMapping> parsedMappings;
private Map<String, Type> brat2UimaMappingCache;
private Map<String, String> uima2BratMappingCache;

@JsonCreator
public TypeMappings(List<TypeMapping> aMappings)
public TypeMappings(List<TypeMapping> typeMappingList)
{
parsedMappings = aMappings;
brat2UimaMappingCache = new HashMap<>();
uima2BratMappingCache = new HashMap<>();
initTypeMappings(typeMappingList);
}

public TypeMappings(String... aMappings)
{
parsedMappings = new ArrayList<>();

List<TypeMapping> typeMappingList = new ArrayList<TypeMapping>();
if (aMappings != null) {
for (String m : aMappings) {
parsedMappings.add(TypeMapping.parse(m));
typeMappingList.add(TypeMapping.parse(m));
}
}

initTypeMappings(typeMappingList);
}

private void initTypeMappings(List<TypeMapping> typeMappingList) {
parsedMappings = new ArrayList<>();

if (typeMappingList != null) {
for(TypeMapping aMapping: typeMappingList) {
parsedMappings.add(aMapping);
}
}

brat2UimaMappingCache = new HashMap<>();
uima2BratMappingCache = new HashMap<>();
Expand Down Expand Up @@ -125,4 +134,7 @@ public String getBratType(Type aType)

return bratType;
}

public void append(TypeMappings additionalMappings) {
}
}