Skip to content

Commit

Permalink
eclipse-rdf4jGH-5058: additional parser code (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
barthanssens committed Jul 15, 2024
1 parent ed1e748 commit f40a74a
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,6 @@ private List<Value> getColumns(Model metadata, Resource tableSchema) throws RDFP
return RDFCollections.asValues(metadata, head.get(), new ArrayList<>());
}


/**
* Get "about" URL template, to be used to create the subject of the triples
*
Expand Down Expand Up @@ -392,7 +391,8 @@ private void parseCSV(Model metadata, RDFHandler handler, URI csvFile, CellParse
if (doReplace) {
values.put("{_col}", Long.toString(i));
}
handleStatement(handler, cellParsers[i], null, aboutSubject, values);
System.err.println("column: " + i);
handleStatement(handler, cellParsers[i], aboutSubject, values, needReplacement[i]);
}
line++;
}
Expand All @@ -415,10 +415,7 @@ private void handleStatement(RDFHandler handler, CellParser cellParser, String c
IRI predicate = cellParser.getPropertyIRI();
Resource o = cellParser.getValueUrl(cell);

Statement stmt = Statements.statement((s != null) ? s : aboutSubject,
predicate,
(o != null) ? o : val,
null);
Statement stmt = Statements.statement((s != null) ? s : aboutSubject, predicate, (o != null) ? o : val, null);
handler.handleStatement(stmt);
}

Expand All @@ -432,15 +429,32 @@ private void handleStatement(RDFHandler handler, CellParser cellParser, String c
*/
private void handleStatement(RDFHandler handler, CellParser cellParser, String cell, Resource aboutSubject,
Map<String, String> values) {
Resource s = cellParser.getAboutUrl(cell);
Resource s = cellParser.getAboutUrl(values);
IRI predicate = cellParser.getPropertyIRI();
Resource o = cellParser.getValueUrl(cell);
Value val = cellParser.parse(cell);
Value o = cellParser.getValueUrl(values, cell);
if (o == null) {
o = cellParser.parse(cell);
}

Statement stmt = Statements.statement((s != null) ? s : aboutSubject, predicate, o, null);
handler.handleStatement(stmt);
}

/**
* Generate statement
*
* @param handler
* @param cellParser
* @param cells
* @param aboutSubject
*/
private void handleStatement(RDFHandler handler, CellParser cellParser, Resource aboutSubject,
Map<String, String> values, boolean needsReplacement) {
Resource s = cellParser.getAboutUrl(values);
IRI predicate = cellParser.getPropertyIRI();
Resource o = (needsReplacement) ? cellParser.getValueUrl(values, null) : cellParser.getValueUrl(null);

Statement stmt = Statements.statement((s != null) ? s : aboutSubject,
predicate,
(o != null) ? o : val,
null);
Statement stmt = Statements.statement((s != null) ? s : aboutSubject, predicate, o, null);
handler.handleStatement(stmt);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,11 @@
*******************************************************************************/
package org.eclipse.rdf4j.rio.csvw;

import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Optional;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
Expand All @@ -28,6 +26,9 @@
import org.eclipse.rdf4j.rio.csvw.parsers.CellParser;
import org.eclipse.rdf4j.rio.csvw.parsers.CellParserFactory;

import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;

/**
* Utility class, mostly about configuring the reader based on the JSON-LD metadata
Expand Down Expand Up @@ -112,7 +113,6 @@ private static IRI getDatatypeIRI(Model metadata, Resource column) {
return CoreDatatype.XSD.valueOf(datatype.stringValue().toUpperCase()).getIri();
}


/**
* Get format string, e.g date format
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -193,14 +194,19 @@ private String[] getPlaceholders(String template) {
Matcher matcher = PLACEHOLDERS.matcher(template);
String ownPlaceholder = getOwnPlaceholder(template);

if (matcher.find()) {
Set<String> placeholders = matcher.results()
.map(m -> m.group())
.filter(m -> !m.equals(ownPlaceholder))
.collect(Collectors.toSet());
return placeholders.toArray(new String[placeholders.size()]);
Set<String> placeholders = matcher.results()
.map(MatchResult::group)
.filter(m -> !m.equals(ownPlaceholder))
.collect(Collectors.toSet());
System.err.println("placeholders " + placeholders);
System.err.println("own placeholders " + ownPlaceholder);

if (placeholders.isEmpty()) {
System.err.println("no placeholder for " + template);
return null;
}
return null;
return placeholders.toArray(new String[placeholders.size()]);

}

/**
Expand All @@ -215,7 +221,7 @@ public IRI getAboutUrl(String cell) {
}
String s = aboutUrl;
if (aboutPlaceholder != null && cell != null) {
s = aboutUrl.replace(encodedName, getValueOrDefault(cell));
s = aboutUrl.replace(aboutPlaceholder, getValueOrDefault(cell));
}
return Values.iri(s);
}
Expand Down Expand Up @@ -298,7 +304,8 @@ public IRI getValueUrl(String cell) {
}
String s = valueUrl;
if (valuePlaceholder != null && cell != null) {
s = valueUrl.replace(encodedName, getValueOrDefault(cell));
System.err.println("repace " + valuePlaceholder + " " + cell);
s = valueUrl.replace(valuePlaceholder, getValueOrDefault(cell));
}
return Values.iri(s);
}
Expand All @@ -315,7 +322,7 @@ public IRI getValueUrl(Map<String, String> values, String cell) {
return null;
}
String s = valueUrl;
if (valuePlaceholder != null) {
if (valuePlaceholder != null && cell != null) {
s = valueUrl.replace(encodedName, getValueOrDefault(cell));
}
for (String val : valuePlaceholders) {
Expand All @@ -331,7 +338,9 @@ public IRI getValueUrl(Map<String, String> values, String cell) {
*/
public void setValueUrl(String valueUrl) {
this.valueUrl = valueUrl;
System.err.println("valueurl " + valueUrl);
// check if this URL contains column placeholders
this.valuePlaceholder = getOwnPlaceholder(valueUrl);
this.valuePlaceholders = getPlaceholders(valueUrl);
}

Expand Down

0 comments on commit f40a74a

Please sign in to comment.