From e5f3a8410d793d63cdda2933647722633c637501 Mon Sep 17 00:00:00 2001 From: Rob Court Date: Wed, 23 Oct 2024 00:01:48 +0100 Subject: [PATCH] removing duplicates --- .../ebi/vfb/neo4j/flybase2neo/add_refs_for_anat.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/uk/ac/ebi/vfb/neo4j/flybase2neo/add_refs_for_anat.py b/src/uk/ac/ebi/vfb/neo4j/flybase2neo/add_refs_for_anat.py index a44f7c65..6991f52c 100644 --- a/src/uk/ac/ebi/vfb/neo4j/flybase2neo/add_refs_for_anat.py +++ b/src/uk/ac/ebi/vfb/neo4j/flybase2neo/add_refs_for_anat.py @@ -190,8 +190,15 @@ def gen_pub_links(self): self.write_pub_link(d['short_form'], s, type=k) def commit(self): - self.node_writer.commit(chunk_length=1000, verbose=True) - self.edge_witer.commit(chunk_length=1000, verbose=True) + # Remove duplicates from the node writer statements + self.node_writer.statements = list(set(self.node_writer.statements)) + + # Remove duplicates from the edge writer statements + self.edge_writer.statements = list(set(self.edge_writer.statements)) + + # Commit the unique statements + self.node_writer.commit(chunk_length=1000, verbose=True) + self.edge_witer.commit(chunk_length=1000, verbose=True) def __main__(): pl = pubLink(endpoint=args.endpoint, @@ -201,4 +208,4 @@ def __main__(): pl.gen_pub_links() pl.commit() -__main__() \ No newline at end of file +__main__()