Skip to content

Commit

Permalink
refactor: colocate query and stats updates
Browse files Browse the repository at this point in the history
  • Loading branch information
fbiville committed Nov 26, 2024
1 parent 3d9fbf3 commit cf5a650
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 28 deletions.
36 changes: 9 additions & 27 deletions src/main/java/com/neo4j/data/importer/GedcomImporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,44 +38,26 @@ public Stream<Statistics> loadGedcom(@Name("file") String file) throws IOExcepti
var model = loadModel(filePath);

var dateParser = new Parser();
var personExtractors = new PersonExtractors(dateParser, model);
var statistics = new Statistics();
try (Transaction tx = db.beginTx()) {

var personExtractors = new PersonExtractors(dateParser, model);
model.getPeople().forEach(person -> {
var attributes = personExtractors.get().apply(person);
var personsStats = tx.execute("CREATE (i:Person) SET i = $attributes", Map.of("attributes", attributes))
var personExtractor = personExtractors.get();
var attributes = personExtractor.apply(person);
var personsStats = tx.execute(personExtractor.query(), Map.of("attributes", attributes))
.getQueryStatistics();

statistics.addNodesCreated(personsStats.getNodesCreated());
personExtractor.updateCounters(personsStats, statistics);
});

var familyExtractors = new FamilyExtractors(dateParser);
model.getFamilies().forEach(family -> {
var attributes = familyExtractors.get().apply(family);
var stats = tx.execute(
"""
UNWIND $spouseIdPairs AS spouseInfo
MATCH (spouse1:Person {id: spouseInfo.id1}),
(spouse2:Person {id: spouseInfo.id2})
CREATE (spouse1)-[r:SPOUSE_OF]->(spouse2)
FOREACH (marriageInfo IN spouseInfo.events["MARR"] |
CREATE (spouse1)-[r:MARRIED_TO]->(spouse2)
SET r = marriageInfo
)
FOREACH (divorceInfo IN spouseInfo.events["DIV"] |
CREATE (spouse1)-[r:DIVORCED]->(spouse2)
SET r = divorceInfo
)
WITH spouse1, spouse2
UNWIND $childIds AS childId
MATCH (child:Person {id: childId})
CREATE (child)-[:CHILD_OF]->(spouse1)
CREATE (child)-[:CHILD_OF]->(spouse2)
""",
attributes)
var familyExtractor = familyExtractors.get();
var familyStats = tx.execute(familyExtractor.query(), familyExtractor.apply(family))
.getQueryStatistics();

statistics.addRelationshipsCreated(stats.getRelationshipsCreated());
familyExtractor.updateCounters(familyStats, statistics);
});

tx.commit();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
package com.neo4j.data.importer.extractors;

import com.neo4j.data.importer.Statistics;
import java.util.Map;
import java.util.function.Function;
import org.neo4j.graphdb.QueryStatistics;

public interface AttributeExtractor<T> extends Function<T, Map<String, Object>> {}
public interface AttributeExtractor<T> extends Function<T, Map<String, Object>> {
String query();

void updateCounters(QueryStatistics results, Statistics counters);
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package com.neo4j.data.importer.extractors;

import com.neo4j.data.importer.Statistics;
import com.neo4j.data.importer.extractors.Lists.Pair;
import java.util.List;
import java.util.Map;
import org.folg.gedcom.model.Family;
import org.neo4j.graphdb.QueryStatistics;

public interface FamilyExtractor extends AttributeExtractor<Family> {

Expand All @@ -13,6 +15,28 @@ public interface FamilyExtractor extends AttributeExtractor<Family> {

List<String> childReferences(Family family);

default String query() {
return """
UNWIND $spouseIdPairs AS spouseInfo
MATCH (spouse1:Person {id: spouseInfo.id1}),
(spouse2:Person {id: spouseInfo.id2})
CREATE (spouse1)-[r:SPOUSE_OF]->(spouse2)
FOREACH (marriageInfo IN spouseInfo.events["MARR"] |
CREATE (spouse1)-[r:MARRIED_TO]->(spouse2)
SET r = marriageInfo
)
FOREACH (divorceInfo IN spouseInfo.events["DIV"] |
CREATE (spouse1)-[r:DIVORCED]->(spouse2)
SET r = divorceInfo
)
WITH spouse1, spouse2
UNWIND $childIds AS childId
MATCH (child:Person {id: childId})
CREATE (child)-[:CHILD_OF]->(spouse1)
CREATE (child)-[:CHILD_OF]->(spouse2)
""";
}

default Map<String, Object> apply(Family family) {
var familyEvents = familyEvents(family);
var spouseInfo = spouseReferences(family).stream()
Expand All @@ -23,4 +47,8 @@ default Map<String, Object> apply(Family family) {
.toList();
return Map.of("spouseIdPairs", spouseInfo, "childIds", childReferences(family));
}

default void updateCounters(QueryStatistics results, Statistics counters) {
counters.addRelationshipsCreated(results.getRelationshipsCreated());
}
}
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package com.neo4j.data.importer.extractors;

import com.neo4j.data.importer.Statistics;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import org.folg.gedcom.model.Person;
import org.neo4j.graphdb.QueryStatistics;

interface PersonExtractor extends AttributeExtractor<Person> {

Expand All @@ -22,6 +24,10 @@ default Optional<String> preferredFirstName(Person person) {
return Optional.empty();
}

default String query() {
return "CREATE (i:Person) SET i = $attributes";
}

default Map<String, Object> apply(Person person) {
Map<String, Object> attributes = new HashMap<>(facts(person));
attributes.put("id", id(person));
Expand All @@ -31,4 +37,8 @@ default Map<String, Object> apply(Person person) {
preferredFirstName(person).ifPresent(gender -> attributes.put("preferred_first_name", gender));
return attributes;
}

default void updateCounters(QueryStatistics results, Statistics counters) {
counters.addNodesCreated(results.getNodesCreated());
}
}

0 comments on commit cf5a650

Please sign in to comment.