[ENH] Change assessment tool availability heuristic to any() (#234)

* switch to any-or-none check of assessment tool availability * update expected assessment availability in test fixtures * rename utility function
neurobagel · Nov 10, 2023 · dbcff19 · dbcff19
1 parent f71873b
commit dbcff19
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 16 deletions.
diff --git a/bagel/cli.py b/bagel/cli.py
@@ -132,10 +132,12 @@ def pheno(
             _assessments = [
                 models.Assessment(identifier=tool)
                 for tool, columns in tool_mapping.items()
-                if putil.are_not_missing(columns, _sub_pheno, data_dictionary)
+                if putil.are_any_available(
+                    columns, _sub_pheno, data_dictionary
+                )
             ]
             if _assessments:
-                # Only set assignments for the subject if at least one is not missing
+                # Only set assignments for the subject if at least one has a non-missing item
                 subject.hasAssessment = _assessments
 
         subject_list.append(subject)

diff --git a/bagel/pheno_utils.py b/bagel/pheno_utils.py
@@ -268,16 +268,14 @@ def get_mismatched_categorical_levels(data_dict: dict) -> list:
     return mismatched_cols
 
 
-def are_not_missing(columns: list, row: pd.Series, data_dict: dict) -> bool:
+def are_any_available(columns: list, row: pd.Series, data_dict: dict) -> bool:
     """
-    Checks that all values in the specified columns are not missing values. This is mainly useful
-    to determine the availability of an assessment tool
+    Checks that at least one of the values in the specified columns is not a missing value.
+    This is mainly useful to determine the availability of an assessment tool
     """
-    return all(
-        [
-            not is_missing_value(value, column, data_dict)
-            for column, value in row[columns].items()
-        ]
+    return any(
+        not is_missing_value(value, column, data_dict)
+        for column, value in row[columns].items()
     )
 
 

diff --git a/bagel/tests/test_cli_pheno.py b/bagel/tests/test_cli_pheno.py
@@ -438,9 +438,12 @@ def test_controlled_term_classes_have_uri_type(
 
 
 @pytest.mark.parametrize(
-    "assessment, subject",
+    "assessment, subject_idx",
     [
-        (None, 0),
+        (
+            [{"identifier": "cogatlas:1234", "schemaKey": "Assessment"}],
+            0,
+        ),
         (None, 1),
         (
             [
@@ -457,7 +460,7 @@ def test_assessment_data_are_parsed_correctly(
     default_pheno_output_path,
     load_test_json,
     assessment,
-    subject,
+    subject_idx,
 ):
     runner.invoke(
         bagel,
@@ -476,7 +479,7 @@ def test_assessment_data_are_parsed_correctly(
 
     pheno = load_test_json(default_pheno_output_path)
 
-    assert assessment == pheno["hasSamples"][subject].get("hasAssessment")
+    assert assessment == pheno["hasSamples"][subject_idx].get("hasAssessment")
 
 
 @pytest.mark.parametrize(

diff --git a/bagel/tests/test_utility.py b/bagel/tests/test_utility.py
@@ -212,7 +212,7 @@ def test_missing_values(value, column, expected):
 
 @pytest.mark.parametrize(
     "subject_idx, is_avail",
-    [(0, False), (2, False), (4, True)],
+    [(0, True), (2, False), (4, True)],
 )
 def test_get_assessment_tool_availability(
     test_data, load_test_json, subject_idx, is_avail
@@ -226,7 +226,9 @@ def test_get_assessment_tool_availability(
     test_columns = ["tool_item1", "tool_item2"]
 
     assert (
-        putil.are_not_missing(test_columns, pheno.iloc[subject_idx], data_dict)
+        putil.are_any_available(
+            test_columns, pheno.iloc[subject_idx], data_dict
+        )
         is is_avail
     )