Skip to content

Commit

Permalink
[ENH] Change assessment tool availability heuristic to any() (#234)
Browse files Browse the repository at this point in the history
* switch to any-or-none check of assessment tool availability

* update expected assessment availability in test fixtures

* rename utility function
  • Loading branch information
alyssadai authored Nov 10, 2023
1 parent f71873b commit dbcff19
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 16 deletions.
6 changes: 4 additions & 2 deletions bagel/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,12 @@ def pheno(
_assessments = [
models.Assessment(identifier=tool)
for tool, columns in tool_mapping.items()
if putil.are_not_missing(columns, _sub_pheno, data_dictionary)
if putil.are_any_available(
columns, _sub_pheno, data_dictionary
)
]
if _assessments:
# Only set assignments for the subject if at least one is not missing
# Only set assignments for the subject if at least one has a non-missing item
subject.hasAssessment = _assessments

subject_list.append(subject)
Expand Down
14 changes: 6 additions & 8 deletions bagel/pheno_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,16 +268,14 @@ def get_mismatched_categorical_levels(data_dict: dict) -> list:
return mismatched_cols


def are_not_missing(columns: list, row: pd.Series, data_dict: dict) -> bool:
def are_any_available(columns: list, row: pd.Series, data_dict: dict) -> bool:
"""
Checks that all values in the specified columns are not missing values. This is mainly useful
to determine the availability of an assessment tool
Checks that at least one of the values in the specified columns is not a missing value.
This is mainly useful to determine the availability of an assessment tool
"""
return all(
[
not is_missing_value(value, column, data_dict)
for column, value in row[columns].items()
]
return any(
not is_missing_value(value, column, data_dict)
for column, value in row[columns].items()
)


Expand Down
11 changes: 7 additions & 4 deletions bagel/tests/test_cli_pheno.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,9 +438,12 @@ def test_controlled_term_classes_have_uri_type(


@pytest.mark.parametrize(
"assessment, subject",
"assessment, subject_idx",
[
(None, 0),
(
[{"identifier": "cogatlas:1234", "schemaKey": "Assessment"}],
0,
),
(None, 1),
(
[
Expand All @@ -457,7 +460,7 @@ def test_assessment_data_are_parsed_correctly(
default_pheno_output_path,
load_test_json,
assessment,
subject,
subject_idx,
):
runner.invoke(
bagel,
Expand All @@ -476,7 +479,7 @@ def test_assessment_data_are_parsed_correctly(

pheno = load_test_json(default_pheno_output_path)

assert assessment == pheno["hasSamples"][subject].get("hasAssessment")
assert assessment == pheno["hasSamples"][subject_idx].get("hasAssessment")


@pytest.mark.parametrize(
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/test_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def test_missing_values(value, column, expected):

@pytest.mark.parametrize(
"subject_idx, is_avail",
[(0, False), (2, False), (4, True)],
[(0, True), (2, False), (4, True)],
)
def test_get_assessment_tool_availability(
test_data, load_test_json, subject_idx, is_avail
Expand All @@ -226,7 +226,9 @@ def test_get_assessment_tool_availability(
test_columns = ["tool_item1", "tool_item2"]

assert (
putil.are_not_missing(test_columns, pheno.iloc[subject_idx], data_dict)
putil.are_any_available(
test_columns, pheno.iloc[subject_idx], data_dict
)
is is_avail
)

Expand Down

0 comments on commit dbcff19

Please sign in to comment.