Skip to content

Commit

Permalink
Finish polyDB importer
Browse files Browse the repository at this point in the history
  • Loading branch information
eloiferrer committed Jun 12, 2023
1 parent 99e04fb commit 756454c
Show file tree
Hide file tree
Showing 6 changed files with 201 additions and 75 deletions.
2 changes: 1 addition & 1 deletion src/mardi_importer/importer/Importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def import_all(self):
"""
Manages the import process.
"""
#self.dataSource.setup()
self.dataSource.setup()
self.dataSource.pull()
self.dataSource.push()

Expand Down
4 changes: 2 additions & 2 deletions src/mardi_importer/polydb/ArxivPublication.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def create(self):
value="scientific article from arXiv"
)

# Instance of: scholary article
# Instance of: scholarly article
item.add_claim('wdt:P31','wd:Q13442814')

# Publication date
Expand Down Expand Up @@ -292,7 +292,7 @@ def create(self):
# DOI
doi = '10.48550/arXiv.' + self.arxiv_id
item.add_claim('wdt:P356', doi)

self.QID = item.write().id

if self.QID:
Expand Down
123 changes: 119 additions & 4 deletions src/mardi_importer/polydb/Collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,63 @@ class GenericReference():
attributes: str

def create(self):
pass
item = self.api.item.new()
item.labels.set(language="en", value=self.title)
item.descriptions.set(
language="en",
value="scientific article"
)

exists = item.is_instance_of('wd:Q13442814')
if exists: return exists

# Instance of: scholarly article
item.add_claim('wdt:P31','wd:Q13442814')

for author in self.authors:
item.add_claim('wdt:P50', author.QID)

conference = None
if self.attributes == "KyotoCGGT2007, 2007/6/11-15":
conference = self.api.import_entities('Q106338712')
elif self.attributes == "Kyoto RIMS Workshop on Computational Geometry and Discrete Mathematics, RIMS, Kyoto University, 2008/10/16":
conf_item = self.api.item.new()
conf_item.labels.set(language="en", value="Kyoto RIMS Workshop on Computational Geometry and Discrete Mathematics")
conf_item.descriptions.set(
language="en",
value="academic conference"
)
conference = conf_item.exists()
if not conference:
conf_item.add_claim('wdt:P31', 'wd:Q2020153')
conf_item.add_claim('wdt:P1476', 'Kyoto RIMS Workshop on Computational Geometry and Discrete Mathematics')
conf_item.add_claim('wdt:P17', 'wd:Q17')
conf_item.add_claim('wdt:P276', 'wd:Q34600')
location = self.api.import_entities('Q840667')
conf_item.add_claim('wdt:P276', location)
conf_item.add_claim('wdt:P921', 'wd:Q874709')
conf_item.add_claim('wdt:P921', 'wd:Q121416')
conf_item.add_claim('wdt:P580', time="+2007-10-16T00:00:00Z")
conf_item.add_claim('wdt:P582', time="+2007-10-18T00:00:00Z")
conference = conf_item.write().id
elif self.attributes == "PhD Thesis, Aarhus 2007":
item.descriptions.set(
language="en",
value="doctoral thesis"
)
item.add_claim('wdt:P31', 'wd:Q187685')
location = self.api.import_entities('Q924265')
item.add_claim('wdt:P4101', location)
item.add_claim('wdt:P577', time="+2007-00-00T00:00:00Z")
else:
print('Following conference or attribute cannot be parsed')
print(self.attributes)
print('----------------------------------')

if conference:
item.add_claim('wdt:P5072', conference)

return item.write().id

@dataclass
class Collection():
Expand Down Expand Up @@ -47,11 +103,59 @@ def __post_init__(self):
self.parse_references(json_data)
self.fill_author_pool()

self.item = self.api.item.new()
self.item.labels.set(language="en", value=self.label)

# PolyDB QID (Instance of: collection)
item = self.api.item.new()
item.labels.set(language="en", value="polyDB collection")
self.poly_db_qid = item.is_instance_of('wd:Q2668072')

# Contributed by
self.contributed_by_pid = self.api.get_local_id_by_label(
'contributed by',
'property')

def exists(self):
pass
return self.item.is_instance_of(self.poly_db_qid)

def create(self):
pass
self.item.descriptions.set(
language="en",
value=self.description
)

# Instance of: PolyDB Collection
self.item.add_claim("wdt:P31", self.poly_db_qid)

# Author
for author in self.authors:
self.item.add_claim("wdt:P50", author.QID)

# Maintainer
for maintainer in self.maintainer:
self.item.add_claim("wdt:P126", maintainer.QID)

# Contributor
for contributor in self.contributor:
self.item.add_claim(self.contributed_by_pid, contributor.QID)

# Publications
for publications in [self.arxiv,
self.crossref,
self.generic_references]:
for publication in publications:
pub_qid = publication.create()
self.item.add_claim('wdt:P2860', pub_qid)

# Data source
for source, url in self.data:
if source == "url":
self.item.add_claim('wdt:P1325', url)
elif source == "github":
self.item.add_claim('wdt:P1324', url)

self.item.write()

def update(self):
pass
Expand Down Expand Up @@ -154,12 +258,23 @@ def create_reference(self, ref):
return ('reference', reference)

def fill_author_pool(self):
self.author_pool.extend(self.authors)
self.author_pool.extend(self.maintainer)
self.author_pool.extend(self.contributor)
for ref in self.arxiv:
self.author_pool.extend(ref.authors)
for ref in self.crossref:
self.author_pool.extend(ref.authors)
for ref in self.generic_references:
self.author_pool.extend(ref.authors)

def update_authors(self, polydb_authors):
authors_list = [self.authors, self.contributor, self.maintainer]
for lst in authors_list:
for author in lst:
author.pull_QID(polydb_authors)


for publications in [self.arxiv, self.crossref, self.generic_references]:
for publication in publications:
for author in publication.authors:
author.pull_QID(polydb_authors)
130 changes: 62 additions & 68 deletions src/mardi_importer/polydb/PolyDBSource.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import time
import json
import os
import logging
#log = logging.getLogger('CRANlogger')

@dataclass
class PolyDBSource(ADataSource):
Expand All @@ -26,34 +24,58 @@ class PolyDBSource(ADataSource):
update: bool = False
integrator: MardiIntegrator = MardiIntegrator()
collection_list: List[str] = field(default_factory=list)
author_pool: List[Author] = field(default_factory=list)
polydb_authors: List[Author] = field(default_factory=list)
collections: List[Collection] = field(default_factory=list)

def __post_init__(self):
self.collection_list = ["Manifolds.DIM2_3",
"Matroids.SelfDual",
"Matroids.Small"]
#self.collection_list = ["Manifolds.DIM2_3",
# "Matroids.SelfDual",
# "Matroids.Small",
# "Polytopes.Combinatorial.01Polytopes",
# "Polytopes.Combinatorial.CombinatorialTypes",
# "Polytopes.Combinatorial.FacesBirkhoffPolytope",
# "Polytopes.Combinatorial.SmallSpheresDim4",
# "Polytopes.Geometric.01Polytopes",
# "Polytopes.Lattice.01Polytopes",
# "Polytopes.Lattice.ExceptionalMaximalHollow",
# "Polytopes.Lattice.FewLatticePoints3D",
# "Polytopes.Lattice.NonSpanning3D",
# "Polytopes.Lattice.Panoptigons",
# "Polytopes.Lattice.Reflexive",
# "Polytopes.Lattice.SmallVolume",
# "Polytopes.Lattice.SmoothReflexive",
# "Tropical.Cubics",
# "Tropical.Polytropes",
# "Tropical.QuarticCurves",
# "Tropical.SchlaefliFan",
# "Tropical.TOM"]
"Matroids.Small",
"Polytopes.Combinatorial.01Polytopes",
"Polytopes.Combinatorial.CombinatorialTypes",
"Polytopes.Combinatorial.FacesBirkhoffPolytope",
"Polytopes.Combinatorial.SmallSpheresDim4",
"Polytopes.Geometric.01Polytopes",
"Polytopes.Lattice.01Polytopes",
"Polytopes.Lattice.ExceptionalMaximalHollow",
"Polytopes.Lattice.FewLatticePoints3D",
"Polytopes.Lattice.NonSpanning3D",
"Polytopes.Lattice.Panoptigons",
"Polytopes.Lattice.Reflexive",
"Polytopes.Lattice.SmallVolume",
"Polytopes.Lattice.SmoothReflexive",
"Tropical.Cubics",
"Tropical.Polytropes",
"Tropical.QuarticCurves",
"Tropical.SchlaefliFan",
"Tropical.TOM"]

def setup(self):
"""Create all necessary properties for polyDB
"""
filepath = os.path.realpath(os.path.dirname(__file__))

filename = filepath + "/wikidata_entities.txt"
self.integrator.import_entities(filename=filename)

filename = filepath + "/new_entities.json"
f = open(filename)
entities = json.load(f)

for prop_element in entities['properties']:
prop = self.integrator.property.new()
prop.labels.set(language='en', value=prop_element['label'])
prop.descriptions.set(language='en', value=prop_element['description'])
prop.datatype = prop_element['datatype']
if not prop.exists(): prop.write()

for item_element in entities['items']:
item = self.integrator.item.new()
item.labels.set(language='en', value=item_element['label'])
item.descriptions.set(language='en', value=item_element['description'])
for key, value in item_element['claims'].items():
item.add_claim(key,value=value)
if not item.exists(): item.write()

# author_tuple = (name, orcid, arxiv_id, affiliation_qid, wikidata_qid)
author_tuples = [('Frank Lutz', '', '', 'wd:Q51985', 'wd:Q102201447'),
Expand Down Expand Up @@ -85,66 +107,38 @@ def __post_init__(self):
('Marta Panizzut', '0000-0001-8631-6329', '', 'wd:Q51985', 'wd:Q102782302'),
('Silke Horn', '', '', 'wd:Q310695', 'wd:Q102398539')]

for name, orcid, arxiv_id, affiliation, qid in author_tuples:
for name, orcid, arxiv_id, affiliation, QID in author_tuples:
author = Author(self.integrator, name, orcid, arxiv_id, affiliation)
if qid:
qid = qid.split(':')[1]
author.qid = self.integrator.import_entities(qid)
self.author_pool.append(author)


def setup(self):
"""Create all necessary properties for polyDB
"""
filepath = os.path.realpath(os.path.dirname(__file__))

filename = filepath + "/wikidata_entities.txt"
self.integrator.import_entities(filename=filename)

filename = filepath + "/new_entities.json"
f = open(filename)
entities = json.load(f)

for prop_element in entities['properties']:
prop = self.integrator.property.new()
prop.labels.set(language='en', value=prop_element['label'])
prop.descriptions.set(language='en', value=prop_element['description'])
prop.datatype = prop_element['datatype']
if not prop.exists(): prop.write()

for item_element in entities['items']:
item = self.integrator.item.new()
item.labels.set(language='en', value=item_element['label'])
item.descriptions.set(language='en', value=item_element['description'])
for key, value in item_element['claims'].items():
item.add_claim(key,value=value)
if not item.exists(): item.write()
if QID:
QID = QID.split(':')[1]
author.QID = self.integrator.import_entities(QID)
self.polydb_authors.append(author)

def get_collection_list(self):
pass

def pull(self):
# Do not pull collections that already exist
if self.update:
self.get_collection_list()
for name in self.collection_list:
print(name)
self.collections.append(Collection(name))
time.sleep(3)


def push(self):
for collection in self.collections:
self.polydb_authors += collection.author_pool

#for collection in self.collections:
# if not collection.exists():
# collection.create()
# if self.update:
# collection.update()
self.polydb_authors = Author.disambiguate_authors(self.polydb_authors)

# Ignore collections that already exists should be done in pull()
for collection in self.collections:
for author in collection.authors:
self.author_pool.extend(collection.author_pool)

Author.disambiguate_authors(self.author_pool)
collection.update_authors(self.polydb_authors)
if not collection.exists():
collection.create()
if self.update:
collection.update()



1 change: 1 addition & 0 deletions src/mardi_importer/polydb/new_entities.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"label": "polyDB collection",
"description": "Object in discrete geometry that can be accessed from within polymake",
"claims": {
"wdt:P31": "wd:Q2668072",
"wdt:P921": "wd:Q906377",
"wdt:P856": "https://polydb.org/"
}
Expand Down
Loading

0 comments on commit 756454c

Please sign in to comment.