diff --git a/mardi_importer/mardi_importer/openml/OpenMLDataset.py b/mardi_importer/mardi_importer/openml/OpenMLDataset.py index 457a5ed..235d110 100644 --- a/mardi_importer/mardi_importer/openml/OpenMLDataset.py +++ b/mardi_importer/mardi_importer/openml/OpenMLDataset.py @@ -41,6 +41,7 @@ def __init__( integrator, name, dataset_id, + description, version, creators, contributors, @@ -67,6 +68,7 @@ def __init__( self.api = integrator self.name = name #done self.dataset_id = str(dataset_id) #done + self.description=description self.version = version #done self.creators = creators self.contributors = contributors @@ -110,6 +112,9 @@ def insert_claims(self): if self.version is not None and self.version != "None": prop_nr = self.api.get_local_id_by_label("dataset version identifier", "property") self.item.add_claim(prop_nr, str(self.version)) + if self.description is not None: + prop_nr = self.api.get_local_id_by_label("description", "property") + self.item.add_claim(prop_nr, str(self.description)) if self.creators and self.creators != "None": #object has role qualifier = [self.api.get_claim("wdt:P3831", "wd:Q59275219")] diff --git a/mardi_importer/mardi_importer/openml/OpenMLSource.py b/mardi_importer/mardi_importer/openml/OpenMLSource.py index 8fbb426..d25cdc0 100644 --- a/mardi_importer/mardi_importer/openml/OpenMLSource.py +++ b/mardi_importer/mardi_importer/openml/OpenMLSource.py @@ -48,7 +48,7 @@ def create_local_entities(self): item.write() def pull(self): - dataset_dict = {"name": [], "dataset_id": [], "version": [], "creators": [], + dataset_dict = {"name": [], "dataset_id": [], "description":[], "version": [], "creators": [], "contributors" : [], "collection_date": [], "upload_date": [], "license": [], "url":[], "default_target_attribute":[], "row_id_attribute":[], "tags":[], "original_data_url":[], "paper_url":[], @@ -65,6 +65,7 @@ def pull(self): ds = openml.datasets.get_dataset(int(did), download_data=False, download_qualities=False, download_features_meta_data=False) dataset_dict["name"].append(ds.name) dataset_dict["dataset_id"].append(did) + dataset_dict["description"].append(ds.description) dataset_dict["version"].append(ds.version) dataset_dict["creators"].append(ds.creator) dataset_dict["contributors"].append(ds.contributor) @@ -78,6 +79,7 @@ def pull(self): dataset_dict["original_data_url"].append(ds.original_data_url) dataset_dict["paper_url"].append(ds.paper_url) dataset_dict["md5_checksum"].append(ds.md5_checksum) + dataset_dict["format"].append(ds.format) try: qualities = ds.qualities except: diff --git a/mardi_importer/mardi_importer/openml/new_entities.json b/mardi_importer/mardi_importer/openml/new_entities.json index 4078405..4e7c38c 100644 --- a/mardi_importer/mardi_importer/openml/new_entities.json +++ b/mardi_importer/mardi_importer/openml/new_entities.json @@ -5,6 +5,11 @@ "description": "Version of a dataset", "datatype": "string" }, + { + "label": "description", + "description": "long description of an item", + "datatype": "string" + }, { "label": "collection date", "description": "date as a string", diff --git a/mardi_importer/mardi_importer/openml/wikidata_entities.txt b/mardi_importer/mardi_importer/openml/wikidata_entities.txt index 414437b..0680e46 100644 --- a/mardi_importer/mardi_importer/openml/wikidata_entities.txt +++ b/mardi_importer/mardi_importer/openml/wikidata_entities.txt @@ -14,4 +14,4 @@ P459 P11238 P3831 Q59275219 -Q20204892 \ No newline at end of file +Q20204892