From d58b29ad61ae20f817db16d9bba4b099a0bad0ca Mon Sep 17 00:00:00 2001 From: Clare72 Date: Tue, 15 Aug 2023 14:57:13 +0100 Subject: [PATCH 1/2] allow template to be made if some FBgns not found (with warning) --- .../vfb/neo4j/flybase2neo/feature_tools.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/uk/ac/ebi/vfb/neo4j/flybase2neo/feature_tools.py b/src/uk/ac/ebi/vfb/neo4j/flybase2neo/feature_tools.py index 86d415be..73c44c8a 100644 --- a/src/uk/ac/ebi/vfb/neo4j/flybase2neo/feature_tools.py +++ b/src/uk/ac/ebi/vfb/neo4j/flybase2neo/feature_tools.py @@ -159,18 +159,23 @@ def feature_robot_template(self, fbids, filename): ("feature_type", "SC %"), ("self_xref", "A http://n2o.neo/custom/self_xref")]) template = pd.DataFrame.from_records([template_seed]) + unmapped_FBgns = [] for f in fbids: - row_od = collections.OrderedDict([]) # new template row as an empty ordered dictionary - for c in template.columns: # make columns and blank data for new template row - row_od.update([(c, "")]) - row_od["iri"] = feature_details[f].iri - row_od["label"] = feature_details[f].label - row_od["synonyms"] = '|'.join(feature_details[f].synonyms) - row_od["feature_type"] = "http://purl.obolibrary.org/obo/" + feature_types[f] - row_od["self_xref"] = "FlyBase" - new_row = pd.DataFrame.from_records([row_od]) - template = pd.concat([template, new_row], ignore_index=True, sort=False) + if f in feature_details.keys(): + row_od = collections.OrderedDict([]) # new template row as an empty ordered dictionary + for c in template.columns: # make columns and blank data for new template row + row_od.update([(c, "")]) + row_od["iri"] = feature_details[f].iri + row_od["label"] = feature_details[f].label + row_od["synonyms"] = '|'.join(feature_details[f].synonyms) + row_od["feature_type"] = "http://purl.obolibrary.org/obo/" + feature_types[f] + row_od["self_xref"] = "FlyBase" + new_row = pd.DataFrame.from_records([row_od]) + template = pd.concat([template, new_row], ignore_index=True, sort=False) + else: + unmapped_FBgns.append(f) template.to_csv(filename, sep="\t", header=True, index=False) + print("WARNING - some FBgns not found:", unmapped_FBgns) # Typing From 7e8f66fa36461a1cd367cfa27f66c92929cad344 Mon Sep 17 00:00:00 2001 From: Clare72 Date: Wed, 16 Aug 2023 07:53:35 +0100 Subject: [PATCH 2/2] make skipping missing IDs optional (default False) --- .../vfb/neo4j/flybase2neo/feature_tools.py | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/uk/ac/ebi/vfb/neo4j/flybase2neo/feature_tools.py b/src/uk/ac/ebi/vfb/neo4j/flybase2neo/feature_tools.py index 73c44c8a..b80f0674 100644 --- a/src/uk/ac/ebi/vfb/neo4j/flybase2neo/feature_tools.py +++ b/src/uk/ac/ebi/vfb/neo4j/flybase2neo/feature_tools.py @@ -148,9 +148,12 @@ def add_features(self, fbids, commit=True): self.addTypes2Neo(fbids=fbids, commit=commit) return feats - def feature_robot_template(self, fbids, filename): + def feature_robot_template(self, fbids, filename, skip_missing=False): """Takes a list of FBids, looks up info (via name_synonym_lookup) and makes a robot template. - Output filename should be specified (template will be a tsv).""" + Output filename should be specified (template will be a tsv). + + Can optionally skip any fbid that is not found using skip_missing (default is to raise a KeyError) + - setting this as a filename will output a file of the FBids that were not found.""" feature_details = self.name_synonym_lookup(fbids) feature_types = dict(self.grossType(fbids)) @@ -174,8 +177,19 @@ def feature_robot_template(self, fbids, filename): template = pd.concat([template, new_row], ignore_index=True, sort=False) else: unmapped_FBgns.append(f) + + if len(unmapped_FBgns) > 0: + if not skip_missing: + raise KeyError(unmapped_FBgns) + elif type(skip_missing) == str: + print("WARNING - some FBgns not found (see file %s):" % skip_missing, unmapped_FBgns) + with open(skip_missing, 'w') as fw: + for l in unmapped_FBgns: + fw.write(l + '\n') + else: + print("WARNING - some FBgns not found:", unmapped_FBgns) + template.to_csv(filename, sep="\t", header=True, index=False) - print("WARNING - some FBgns not found:", unmapped_FBgns) # Typing