From b03e89fe81eb7f44e7a0f65f08f45654a1ae2200 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 24 Oct 2024 15:35:15 -0400 Subject: [PATCH] [FIX] Allow only `"true"` or `None` for `is_control` query parameter (#364) * make is_control a str and add custom validation * add explanatory comment * update docstring * update tests * update query template filter for is_control * test bool parsing of is_control query param * refactor query parameter validation * update test docstring --- app/api/models.py | 28 ++++++++++++++++++++++++++-- app/api/utility.py | 23 +++++++++++------------ tests/test_query.py | 24 ++++++++++++++++++++---- 3 files changed, 57 insertions(+), 18 deletions(-) diff --git a/app/api/models.py b/app/api/models.py index 27c6fa4..0d2b30d 100644 --- a/app/api/models.py +++ b/app/api/models.py @@ -5,7 +5,7 @@ from fastapi import Query from fastapi.exceptions import HTTPException -from pydantic import BaseModel, constr, root_validator +from pydantic import BaseModel, constr, root_validator, validator CONTROLLED_TERM_REGEX = r"^[a-zA-Z]+[:]\S+$" VERSION_REGEX = r"^([A-Za-z0-9-]+)\.(\d+)\.([A-Za-z0-9-]+)$" @@ -18,7 +18,7 @@ class QueryModel(BaseModel): max_age: float = Query(default=None, ge=0) sex: constr(regex=CONTROLLED_TERM_REGEX) = None diagnosis: constr(regex=CONTROLLED_TERM_REGEX) = None - is_control: bool = None + is_control: Optional[str] = None min_num_imaging_sessions: int = Query(default=None, ge=0) min_num_phenotypic_sessions: int = Query(default=None, ge=0) assessment: constr(regex=CONTROLLED_TERM_REGEX) = None @@ -27,6 +27,30 @@ class QueryModel(BaseModel): # TODO: Check back if validating using a regex is too restrictive pipeline_version: constr(regex=VERSION_REGEX) = None + @validator("is_control") + def convert_valid_is_control_values_to_bool(cls, v): + """ + Convert case-insensitive values of 'true' to boolean True, + or raise a validation error for other string values. + + TODO: If/once we migrate the n-API to Pydantic v2, + instead of using this approach to enforce a case-insensitive allowed value "true" and convert it to a bool, + we can use the new BeforeValidator method to do this directly in the query parameter definition. + Context: Enum and Literal both don't work well when we want allowed options of either True (bool) or None, + or even case-insensitive "true" (str) or None, for a FastAPI query param. + As a workaround, we set the type to str and do the validation/conversion afterwards. + See also https://github.com/fastapi/fastapi/discussions/8966 + """ + if v is not None: + # Ensure that the allowed value is case-insensitive + if v.lower() != "true": + raise HTTPException( + status_code=422, + detail="'is_control' must be either set to 'true' or omitted from the query", + ) + return True + return None + @root_validator() def check_maxage_ge_minage(cls, values): """ diff --git a/app/api/utility.py b/app/api/utility.py index 41a4f7e..a9285db 100644 --- a/app/api/utility.py +++ b/app/api/utility.py @@ -190,18 +190,17 @@ def create_query( + f"{create_bound_filter(DIAGNOSIS.var)} && ?{DIAGNOSIS.var} = {diagnosis})." ) - if is_control is not None: - if is_control: - phenotypic_session_level_filters += ( - "\n" - + f"{create_bound_filter(IS_CONTROL.var)} && ?{IS_CONTROL.var} = {IS_CONTROL_TERM})." - ) - else: - # TODO: Revisit - this logic seems odd, since in our current data model the session should not have this edge if it's not a control. - phenotypic_session_level_filters += ( - "\n" - + f"{create_bound_filter(IS_CONTROL.var)} && ?{IS_CONTROL.var} != {IS_CONTROL_TERM})." - ) + # TODO: Simple equivalence to the URI for Healthy Control only works for the condition is_control=True, + # since otherwise the subject node wouldn't be expected to have the property nb:hasSubjectGroup at all. + # If we decide to support queries of is_control = False (i.e., give me all subjects that are *not* controls / have + # at least one diagnosis), we can use something like `FILTER (!BOUND(?{IS_CONTROL.var}))` to + # return only subjects missing the property nb:hasSubjectGroup. + # Related: https://github.com/neurobagel/api/issues/247 + if is_control is True: + phenotypic_session_level_filters += ( + "\n" + + f"{create_bound_filter(IS_CONTROL.var)} && ?{IS_CONTROL.var} = {IS_CONTROL_TERM})." + ) if assessment is not None: phenotypic_session_level_filters += ( diff --git a/tests/test_query.py b/tests/test_query.py index b26f3e2..275fa9b 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -5,6 +5,7 @@ import app.api.utility as util from app.api import crud +from app.api.models import QueryModel ROUTE = "/query" @@ -238,7 +239,7 @@ def test_get_invalid_diagnosis( assert response.status_code == 422 -@pytest.mark.parametrize("valid_iscontrol", [True, False]) +@pytest.mark.parametrize("valid_iscontrol", ["true", "True", "TRUE"]) def test_get_valid_iscontrol( test_app, mock_successful_get, @@ -257,17 +258,32 @@ def test_get_valid_iscontrol( assert response.json() != [] +@pytest.mark.parametrize("valid_iscontrol", ["true", "True", "TRUE"]) +def test_valid_iscontrol_parsed_as_bool(valid_iscontrol): + """Test that valid is_control values do not produce a validation error and are parsed as booleans.""" + + example_query = QueryModel(is_control=valid_iscontrol) + assert example_query.is_control is True + + @pytest.mark.parametrize("mock_get", [None], indirect=True) +@pytest.mark.parametrize("invalid_iscontrol", ["false", "FALSE", "all"]) def test_get_invalid_iscontrol( - test_app, mock_get, monkeypatch, mock_auth_header, set_mock_verify_token + test_app, + mock_get, + monkeypatch, + mock_auth_header, + set_mock_verify_token, + invalid_iscontrol, ): - """Given a non-boolean is_control value, returns a 422 status code.""" + """Given an invalid is_control value, returns a 422 status code and informative error.""" monkeypatch.setattr(crud, "get", mock_get) response = test_app.get( - f"{ROUTE}?is_control=apple", headers=mock_auth_header + f"{ROUTE}?is_control={invalid_iscontrol}", headers=mock_auth_header ) assert response.status_code == 422 + assert "must be either set to 'true' or omitted" in response.text @pytest.mark.parametrize("mock_get", [None], indirect=True)