VirtualFlyBrain · Clare72 · Aug 16, 2023 · Aug 15, 2023 · Aug 16, 2023
diff --git a/src/uk/ac/ebi/vfb/neo4j/flybase2neo/feature_tools.py b/src/uk/ac/ebi/vfb/neo4j/flybase2neo/feature_tools.py
@@ -148,9 +148,12 @@ def add_features(self, fbids, commit=True):
         self.addTypes2Neo(fbids=fbids, commit=commit)
         return feats
 
-    def feature_robot_template(self, fbids, filename):
+    def feature_robot_template(self, fbids, filename, skip_missing=False):
         """Takes a list of FBids, looks up info (via name_synonym_lookup) and makes a robot template.
-        Output filename should be specified (template will be a tsv)."""
+        Output filename should be specified (template will be a tsv).
+
+        Can optionally skip any fbid that is not found using skip_missing (default is to raise a KeyError)
+        - setting this as a filename will output a file of the FBids that were not found."""
         feature_details = self.name_synonym_lookup(fbids)
         feature_types = dict(self.grossType(fbids))
 
@@ -159,17 +162,33 @@ def feature_robot_template(self, fbids, filename):
                                                  ("feature_type", "SC %"),
                                                  ("self_xref", "A http://n2o.neo/custom/self_xref")])
         template = pd.DataFrame.from_records([template_seed])
+        unmapped_FBgns = []
         for f in fbids:
-            row_od = collections.OrderedDict([])  # new template row as an empty ordered dictionary
-            for c in template.columns:  # make columns and blank data for new template row
-                row_od.update([(c, "")])
-            row_od["iri"] = feature_details[f].iri
-            row_od["label"] = feature_details[f].label
-            row_od["synonyms"] = '|'.join(feature_details[f].synonyms)
-            row_od["feature_type"] = "http://purl.obolibrary.org/obo/" + feature_types[f]
-            row_od["self_xref"] = "FlyBase"
-            new_row = pd.DataFrame.from_records([row_od])
-            template = pd.concat([template, new_row], ignore_index=True, sort=False)
+            if f in feature_details.keys():
+                row_od = collections.OrderedDict([])  # new template row as an empty ordered dictionary
+                for c in template.columns:  # make columns and blank data for new template row
+                    row_od.update([(c, "")])
+                row_od["iri"] = feature_details[f].iri
+                row_od["label"] = feature_details[f].label
+                row_od["synonyms"] = '|'.join(feature_details[f].synonyms)
+                row_od["feature_type"] = "http://purl.obolibrary.org/obo/" + feature_types[f]
+                row_od["self_xref"] = "FlyBase"
+                new_row = pd.DataFrame.from_records([row_od])
+                template = pd.concat([template, new_row], ignore_index=True, sort=False)
+            else:
+                unmapped_FBgns.append(f)
+
+        if len(unmapped_FBgns) > 0:
+            if not skip_missing:
+                raise KeyError(unmapped_FBgns)
+            elif type(skip_missing) == str:
+                print("WARNING - some FBgns not found (see file %s):" % skip_missing, unmapped_FBgns)
+                with open(skip_missing, 'w') as fw:
+                    for l in unmapped_FBgns:
+                        fw.write(l + '\n')
+            else:
+                print("WARNING - some FBgns not found:", unmapped_FBgns)
+
         template.to_csv(filename, sep="\t", header=True, index=False)