From b1823eeb4845bef962f5b4b295727dcc59310839 Mon Sep 17 00:00:00 2001 From: Alyssa Dai Date: Thu, 14 Nov 2024 22:36:49 -0500 Subject: [PATCH] [REF] Upgrade codebase to Pydantic>2 (#389) * update extra fields config, regex validation * replace conlist with List and custom AfterValidator * add type annotation to all schemaKey fields - now required to prevent error: pydantic.errors.PydanticUserError: Field 'schemaKey' defined on a base class was overridden by a non-annotated attribute. All field definitions, including overrides, require a type annotation. * use pydantic.RootModel instead of deprecated __root__ * update pydantic model method names * update generate_context to better accommodate changes to Pydantic internals * update URL type annots and notes re: deprecation of str inheriting * remove example portal URL missing TLD - pydantic v2 HttpUrl no longer requires it * shorten test name * update dependencies including pydantic>2 * replace deprecated DataFrame.applymap call * require python>=3.10 for package and test 3.11 * add python versions badge * add README note about pip-compile's Python version awareness * test validate_unique_list() in pydantic model instance * remove seemingly unnecessary Optional type hint * raise explicit exception for non-unique missing value list --------- Co-authored-by: Sebastian Urchs --- .github/workflows/test.yml | 2 +- README.md | 5 +- bagel/dictionary_models.py | 57 +++++++++--- bagel/models.py | 28 +++--- bagel/utilities/model_utils.py | 29 +++--- bagel/utilities/pheno_utils.py | 15 ++-- dev_requirements.txt | 132 +++++++++++++++++----------- requirements.txt | 85 +++++++++++------- setup.cfg | 8 +- tests/integration/test_cli_pheno.py | 4 +- tests/unit/test_model_utils.py | 29 +++++- 11 files changed, 254 insertions(+), 140 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c6b162bd..9ca85d8b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.10", "3.11"] steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index 4a74c87e..ea6ac78e 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ [![Coverage Status](https://coveralls.io/repos/github/neurobagel/bagel-cli/badge.svg?branch=main)](https://coveralls.io/github/neurobagel/bagel-cli?branch=main) [![Tests](https://github.com/neurobagel/bagel-cli/actions/workflows/test.yml/badge.svg)](https://github.com/neurobagel/bagel-cli/actions/workflows/test.yml) [![Docker Image Version](https://img.shields.io/docker/v/neurobagel/bagelcli?label=docker)](https://hub.docker.com/r/neurobagel/bagelcli/tags) +[![Python versions](https://img.shields.io/badge/Python-3.10%20%7C%203.11-blue?style=flat)](https://www.python.org) @@ -61,6 +62,8 @@ The `requirements.txt` file is automatically generated from the `setup.cfg` constraints. To update it, we use `pip-compile` from the `pip-tools` package. Here is how you can use these tools to update the `requirements.txt` file. +_Note: `pip-compile` will update dependencies based on the Python version of the environment it's running in._ + 1. Ensure `pip-tools` is installed: ```bash pip install pip-tools @@ -72,7 +75,7 @@ Here is how you can use these tools to update the `requirements.txt` file. 3. The above command only updates the runtime dependencies. Now, update the developer dependencies in `dev_requirements.txt`: ```bash - pip-compile -o dev_requirements.txt --extra all + pip-compile -o dev_requirements.txt --extra all --upgrade ``` ## Regenerating the Neurobagel vocabulary file diff --git a/bagel/dictionary_models.py b/bagel/dictionary_models.py index 5cd2fe5e..e205a2fd 100644 --- a/bagel/dictionary_models.py +++ b/bagel/dictionary_models.py @@ -1,6 +1,27 @@ -from typing import Dict, Optional, Union +from typing import Dict, List, Union -from pydantic import BaseModel, Extra, Field, conlist +from pydantic import AfterValidator, BaseModel, ConfigDict, Field, RootModel +from pydantic_core import PydanticCustomError +from typing_extensions import Annotated + + +def validate_unique_list(values: List[str]) -> List[str]: + """ + Check that provided list only has unique elements. + + This custom validator is needed because constrained dtypes and their `unique_items` parameter + were deprecated in Pydantic v2. This function was adapted from https://github.com/pydantic/pydantic-core/pull/820#issuecomment-1656228704 + and https://docs.pydantic.dev/latest/concepts/validators/#annotated-validators. + + See also: + - https://docs.pydantic.dev/latest/migration/#changes-to-pydanticfield + - https://docs.pydantic.dev/latest/api/types/#pydantic.types.conlist) + """ + if len(values) != len(set(values)): + raise PydanticCustomError( + "unique_list", f"{values} is not a unique list" + ) + return values class Identifier(BaseModel): @@ -27,15 +48,19 @@ class Neurobagel(BaseModel): description="The concept or controlled term that describes this column", alias="IsAbout", ) - missingValues: conlist(str, unique_items=True) = Field( - [], - description="A list of unique values that represent " - "invalid responses, typos, or missing data", - alias="MissingValues", - ) + missingValues: Annotated[ + List[str], + AfterValidator(validate_unique_list), + Field( + [], + description="A list of unique values that represent " + "invalid responses, typos, or missing data", + alias="MissingValues", + json_schema_extra={"uniqueItems": True}, + ), + ] - class Config: - extra = Extra.forbid + model_config = ConfigDict(extra="forbid") class CategoricalNeurobagel(Neurobagel): @@ -66,7 +91,7 @@ class ContinuousNeurobagel(Neurobagel): class IdentifierNeurobagel(Neurobagel): """A Neurobagel annotation for an identifier column""" - identifies: "str" = Field( + identifies: str = Field( ..., description="For identifier columns, the type of observation uniquely identified by this column.", alias="Identifies", @@ -76,7 +101,9 @@ class IdentifierNeurobagel(Neurobagel): class ToolNeurobagel(Neurobagel): """A Neurobagel annotation for an assessment tool column""" - isPartOf: Optional[Identifier] = Field( + # NOTE: Optional[Identifier] was removed as part of https://github.com/neurobagel/bagel-cli/pull/389 + # because we couldn't tell what the Optional was doing + isPartOf: Identifier = Field( ..., description="If the column is a subscale or item of an assessment tool " "then the assessment tool should be specified here.", @@ -123,7 +150,9 @@ class ContinuousColumn(Column): ) -class DataDictionary(BaseModel): +class DataDictionary( + RootModel[Dict[str, Union[ContinuousColumn, CategoricalColumn]]] +): """A data dictionary with human and machine readable information for a tabular data file""" - __root__: Dict[str, Union[ContinuousColumn, CategoricalColumn]] + pass diff --git a/bagel/models.py b/bagel/models.py index 129af284..11c91f6a 100644 --- a/bagel/models.py +++ b/bagel/models.py @@ -1,7 +1,7 @@ import uuid from typing import List, Literal, Optional, Union -from pydantic import BaseModel, Extra, Field, HttpUrl +from pydantic import BaseModel, ConfigDict, Field, HttpUrl from bagel.mappings import NB @@ -9,15 +9,17 @@ BAGEL_UUID_PATTERN = rf"^{NB.pf}:{UUID_PATTERN}" -class Bagel(BaseModel, extra=Extra.forbid): +class Bagel(BaseModel): """identifier has to be a valid UUID prepended by the Neurobagel namespace by default, a random (uuid4) string UUID will be created""" identifier: str = Field( - regex=BAGEL_UUID_PATTERN, + pattern=BAGEL_UUID_PATTERN, default_factory=lambda: NB.pf + ":" + str(uuid.uuid4()), ) + model_config = ConfigDict(extra="forbid") + class ControlledTerm(BaseModel): identifier: Union[str, HttpUrl] @@ -25,23 +27,23 @@ class ControlledTerm(BaseModel): class Sex(ControlledTerm): - schemaKey = "Sex" + schemaKey: Literal["Sex"] = "Sex" class Diagnosis(ControlledTerm): - schemaKey = "Diagnosis" + schemaKey: Literal["Diagnosis"] = "Diagnosis" class SubjectGroup(ControlledTerm): - schemaKey = "SubjectGroup" + schemaKey: Literal["SubjectGroup"] = "SubjectGroup" class Assessment(ControlledTerm): - schemaKey = "Assessment" + schemaKey: Literal["Assessment"] = "Assessment" class Image(ControlledTerm): - schemaKey = "Image" + schemaKey: Literal["Image"] = "Image" class Acquisition(Bagel): @@ -50,7 +52,7 @@ class Acquisition(Bagel): class Pipeline(ControlledTerm): - schemaKey = "Pipeline" + schemaKey: Literal["Pipeline"] = "Pipeline" class CompletedPipeline(Bagel): @@ -69,7 +71,7 @@ class PhenotypicSession(Session): isSubjectGroup: Optional[SubjectGroup] = None hasDiagnosis: Optional[List[Diagnosis]] = None hasAssessment: Optional[List[Assessment]] = None - schemaKey = "PhenotypicSession" + schemaKey: Literal["PhenotypicSession"] = "PhenotypicSession" class ImagingSession(Session): @@ -77,7 +79,7 @@ class ImagingSession(Session): hasFilePath: Optional[str] = None hasAcquisition: Optional[List[Acquisition]] = None hasCompletedPipeline: Optional[List[CompletedPipeline]] = None - schemaKey = "ImagingSession" + schemaKey: Literal["ImagingSession"] = "ImagingSession" class Subject(Bagel): @@ -88,6 +90,8 @@ class Subject(Bagel): class Dataset(Bagel): hasLabel: str - hasPortalURI: Optional[HttpUrl] = None + # NOTE: Since Pydantic v2, URL types no longer inherit from `str` + # (see https://docs.pydantic.dev/latest/migration/#url-and-dsn-types-in-pydanticnetworks-no-longer-inherit-from-str) + hasPortalURI: Optional[Union[str, HttpUrl]] = None hasSamples: List[Subject] schemaKey: Literal["Dataset"] = "Dataset" diff --git a/bagel/utilities/model_utils.py b/bagel/utilities/model_utils.py index fa47f022..7e609eb5 100644 --- a/bagel/utilities/model_utils.py +++ b/bagel/utilities/model_utils.py @@ -1,3 +1,4 @@ +import inspect from pathlib import Path from typing import Iterable @@ -11,24 +12,22 @@ def generate_context(): - # Direct copy of the dandi-schema context generation function + # Adapted from the dandi-schema context generation function # https://github.com/dandi/dandi-schema/blob/c616d87eaae8869770df0cb5405c24afdb9db096/dandischema/metadata.py field_preamble = { namespace.pf: namespace.url for namespace in ALL_NAMESPACES } fields = {} - for val in dir(models): - klass = getattr(models, val) - if not isinstance(klass, pydantic.main.ModelMetaclass): - continue - fields[klass.__name__] = f"{NB.pf}:{klass.__name__}" - for name, field in klass.__fields__.items(): - if name == "schemaKey": - fields[name] = "@type" - elif name == "identifier": - fields[name] = "@id" - elif name not in fields: - fields[name] = {"@id": f"{NB.pf}:{name}"} + for klass_name, klass in inspect.getmembers(models): + if inspect.isclass(klass) and issubclass(klass, pydantic.BaseModel): + fields[klass_name] = f"{NB.pf}:{klass_name}" + for name, field in klass.model_fields.items(): + if name == "schemaKey": + fields[name] = "@type" + elif name == "identifier": + fields[name] = "@id" + elif name not in fields: + fields[name] = {"@id": f"{NB.pf}:{name}"} field_preamble.update(**fields) @@ -41,7 +40,7 @@ def add_context_to_graph_dataset(dataset: models.Dataset) -> dict: # We can't just exclude_unset here because the identifier and schemaKey # for each instance are created as default values and so technically are never set # TODO: we should revisit this because there may be reasons to have None be meaningful in the future - return {**context, **dataset.dict(exclude_none=True)} + return {**context, **dataset.model_dump(exclude_none=True)} def get_subs_missing_from_pheno_data( @@ -80,7 +79,7 @@ def extract_and_validate_jsonld_dataset(file_path: Path) -> models.Dataset: jsonld = file_utils.load_json(file_path) jsonld.pop("@context") try: - jsonld_dataset = models.Dataset.parse_obj(jsonld) + jsonld_dataset = models.Dataset.model_validate(jsonld) except ValidationError as err: typer.echo( typer.style( diff --git a/bagel/utilities/pheno_utils.py b/bagel/utilities/pheno_utils.py index 6b28ddb0..791d98d6 100644 --- a/bagel/utilities/pheno_utils.py +++ b/bagel/utilities/pheno_utils.py @@ -13,7 +13,7 @@ from bagel import dictionary_models, mappings from bagel.mappings import NB -DICTIONARY_SCHEMA = dictionary_models.DataDictionary.schema() +DICTIONARY_SCHEMA = dictionary_models.DataDictionary.model_json_schema() AGE_HEURISTICS = { "float": NB.pf + ":FromFloat", @@ -24,10 +24,13 @@ } -def validate_portal_uri(portal: str) -> Optional[str]: +def validate_portal_uri(portal: Optional[str]) -> Optional[str]: """Custom validation that portal is a valid HttpUrl""" + # NOTE: We need Optional in the validation type below to account for --portal being an optional argument in the pheno command try: - pydantic.parse_obj_as(Optional[pydantic.HttpUrl], portal) + pydantic.TypeAdapter(Optional[pydantic.HttpUrl]).validate_python( + portal + ) except pydantic.ValidationError as err: raise BadParameter( "Not a valid http or https URL: " @@ -281,11 +284,7 @@ def get_rows_with_empty_strings(df: pd.DataFrame, columns: list) -> list: """For specified columns, returns the indices of rows with empty strings""" # NOTE: Profile this section if things get slow, transforming "" -> nan and then # using .isna() will very likely be much faster - empty_row = ( - df[columns] - .applymap(lambda cell: cell == "") - .apply(lambda row: any([value for value in row]), axis=1) - ) + empty_row = df[columns].eq("").any(axis=1) return list(empty_row[empty_row].index) diff --git a/dev_requirements.txt b/dev_requirements.txt index cd2dceaf..e7de63f8 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -4,35 +4,45 @@ # # pip-compile --extra=all --output-file=dev_requirements.txt # +annotated-types==0.7.0 + # via pydantic astor==0.8.1 # via formulaic -attrs==23.1.0 +attrs==24.2.0 # via # jsonschema # referencing -bids-validator==1.12.0 +bids-validator==1.14.7.post0 # via pybids -black==24.3.0 +bidsschematools==0.11.3.post2 + # via bids-validator +black==24.10.0 # via flake8-black -build==0.10.0 +build==1.2.2.post1 # via pip-tools cfgv==3.4.0 # via pre-commit click==8.1.7 # via + # bidsschematools # black # pip-tools # pybids # typer -coverage==7.3.0 +colorama==0.4.6 + # via + # build + # click + # pytest +coverage==7.6.4 # via bagel (setup.py) -distlib==0.3.7 +distlib==0.3.9 # via virtualenv docopt==0.6.2 # via num2words -filelock==3.12.2 +filelock==3.16.1 # via virtualenv -flake8==6.1.0 +flake8==7.1.1 # via # bagel (setup.py) # flake8-black @@ -40,19 +50,25 @@ flake8-black==0.3.6 # via bagel (setup.py) formulaic==0.5.2 # via pybids -greenlet==2.0.2 +fsspec==2024.10.0 + # via universal-pathlib +greenlet==3.1.1 # via sqlalchemy -identify==2.5.27 +identify==2.6.2 # via pre-commit +importlib-resources==6.4.5 + # via nibabel iniconfig==2.0.0 # via pytest interface-meta==1.3.0 # via formulaic -isodate==0.6.1 +isodate==0.7.2 # via bagel (setup.py) -jsonschema==4.19.0 - # via bagel (setup.py) -jsonschema-specifications==2023.7.1 +jsonschema==4.23.0 + # via + # bagel (setup.py) + # bidsschematools +jsonschema-specifications==2024.10.1 # via jsonschema markdown-it-py==3.0.0 # via rich @@ -62,96 +78,108 @@ mdurl==0.1.2 # via markdown-it-py mypy-extensions==1.0.0 # via black -nibabel==5.1.0 +nibabel==5.3.2 # via pybids -nodeenv==1.8.0 +nodeenv==1.9.1 # via pre-commit -num2words==0.5.12 +num2words==0.5.13 # via pybids -numpy==1.25.2 +numpy==2.1.3 # via # formulaic # nibabel # pandas # pybids # scipy -packaging==23.1 +packaging==24.2 # via # black # build # nibabel # pytest -pandas==2.0.3 +pandas==2.2.3 # via # formulaic # pybids -pathspec==0.11.2 +pathspec==0.12.1 # via black -pip-tools==7.3.0 +pip-tools==7.4.1 # via bagel (setup.py) -platformdirs==3.10.0 +platformdirs==4.3.6 # via # black # virtualenv -pluggy==1.2.0 +pluggy==1.5.0 # via pytest -pre-commit==3.3.3 +pre-commit==4.0.1 # via bagel (setup.py) -pybids==0.16.3 +pybids==0.17.2 # via bagel (setup.py) -pycodestyle==2.11.0 +pycodestyle==2.12.1 # via flake8 -pydantic==1.10.13 +pydantic==2.9.2 # via bagel (setup.py) -pyflakes==3.1.0 +pydantic-core==2.23.4 + # via pydantic +pyflakes==3.2.0 # via flake8 -pygments==2.16.1 +pygments==2.18.0 # via rich -pyproject-hooks==1.0.0 - # via build -pytest==7.4.0 +pyproject-hooks==1.2.0 + # via + # build + # pip-tools +pytest==8.3.3 # via bagel (setup.py) -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via pandas -pytz==2023.3 +pytz==2024.2 # via pandas -pyyaml==6.0.1 - # via pre-commit -referencing==0.30.2 +pyyaml==6.0.2 + # via + # bidsschematools + # pre-commit +referencing==0.35.1 # via # jsonschema # jsonschema-specifications -rich==13.5.2 - # via bagel (setup.py) -rpds-py==0.9.2 +rich==13.9.4 + # via + # bagel (setup.py) + # typer +rpds-py==0.21.0 # via # jsonschema # referencing -scipy==1.11.2 +scipy==1.14.1 # via # formulaic # pybids +shellingham==1.5.4 + # via typer six==1.16.0 - # via - # isodate - # python-dateutil -sqlalchemy==2.0.20 + # via python-dateutil +sqlalchemy==2.0.36 # via pybids -typer==0.9.0 +typer==0.13.0 # via bagel (setup.py) -typing-extensions==4.7.1 +typing-extensions==4.12.2 # via # formulaic + # nibabel # pydantic + # pydantic-core # sqlalchemy # typer -tzdata==2023.3 +tzdata==2024.2 # via pandas -virtualenv==20.24.3 +universal-pathlib==0.2.5 + # via pybids +virtualenv==20.27.1 # via pre-commit -wheel==0.41.2 +wheel==0.45.0 # via pip-tools -wrapt==1.15.0 +wrapt==1.16.0 # via formulaic # The following packages are considered to be unsafe in a requirements file: diff --git a/requirements.txt b/requirements.txt index dae9b6fb..08fa3aea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,94 +2,117 @@ # This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile +# pip-compile --output-file=requirements.txt # +annotated-types==0.7.0 + # via pydantic astor==0.8.1 # via formulaic -attrs==23.1.0 +attrs==24.2.0 # via # jsonschema # referencing -bids-validator==1.12.0 +bids-validator==1.14.7.post0 # via pybids +bidsschematools==0.11.3.post2 + # via bids-validator click==8.1.7 # via + # bidsschematools # pybids # typer +colorama==0.4.6 + # via click docopt==0.6.2 # via num2words formulaic==0.5.2 # via pybids -greenlet==2.0.2 +fsspec==2024.10.0 + # via universal-pathlib +greenlet==3.1.1 # via sqlalchemy +importlib-resources==6.4.5 + # via nibabel interface-meta==1.3.0 # via formulaic -isodate==0.6.1 - # via bagel (setup.py) -jsonschema==4.19.0 +isodate==0.7.2 # via bagel (setup.py) -jsonschema-specifications==2023.7.1 +jsonschema==4.23.0 + # via + # bagel (setup.py) + # bidsschematools +jsonschema-specifications==2024.10.1 # via jsonschema markdown-it-py==3.0.0 # via rich mdurl==0.1.2 # via markdown-it-py -nibabel==5.1.0 +nibabel==5.3.2 # via pybids -num2words==0.5.12 +num2words==0.5.13 # via pybids -numpy==1.25.2 +numpy==2.1.3 # via # formulaic # nibabel # pandas # pybids # scipy -packaging==23.1 +packaging==24.2 # via nibabel -pandas==2.0.3 +pandas==2.2.3 # via # formulaic # pybids -pybids==0.16.3 +pybids==0.17.2 # via bagel (setup.py) -pydantic==1.10.13 +pydantic==2.9.2 # via bagel (setup.py) -pygments==2.16.1 +pydantic-core==2.23.4 + # via pydantic +pygments==2.18.0 # via rich -python-dateutil==2.8.2 +python-dateutil==2.9.0.post0 # via pandas -pytz==2023.3 +pytz==2024.2 # via pandas -referencing==0.30.2 +pyyaml==6.0.2 + # via bidsschematools +referencing==0.35.1 # via # jsonschema # jsonschema-specifications -rich==13.5.2 - # via bagel (setup.py) -rpds-py==0.9.2 +rich==13.9.4 + # via + # bagel (setup.py) + # typer +rpds-py==0.21.0 # via # jsonschema # referencing -scipy==1.11.2 +scipy==1.14.1 # via # formulaic # pybids +shellingham==1.5.4 + # via typer six==1.16.0 - # via - # isodate - # python-dateutil -sqlalchemy==2.0.20 + # via python-dateutil +sqlalchemy==2.0.36 # via pybids -typer==0.9.0 +typer==0.13.0 # via bagel (setup.py) -typing-extensions==4.7.1 +typing-extensions==4.12.2 # via # formulaic + # nibabel # pydantic + # pydantic-core # sqlalchemy # typer -tzdata==2023.3 +tzdata==2024.2 # via pandas -wrapt==1.15.0 +universal-pathlib==0.2.5 + # via pybids +wrapt==1.16.0 # via formulaic diff --git a/setup.cfg b/setup.cfg index 00c575a6..eece2fe4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,4 +1,6 @@ +# NOTE: bagel will currently always be installed as version 0.0.0 when using pip [metadata] +name = bagel license = MIT license_files = LICENSE author = neurobagel developers @@ -13,16 +15,16 @@ classifiers = Development Status :: 1 - Planning Intended Audience :: Science/Research License :: OSI Approved :: MIT License - Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 + Programming Language :: Python :: 3.11 [options] -python_requires = >= 3.9 +python_requires = >= 3.10 install_requires = pybids typer rich - pydantic<2 + pydantic jsonschema isodate zip_safe = False diff --git a/tests/integration/test_cli_pheno.py b/tests/integration/test_cli_pheno.py index aa257fce..8bb60891 100644 --- a/tests/integration/test_cli_pheno.py +++ b/tests/integration/test_cli_pheno.py @@ -149,10 +149,10 @@ def test_invalid_inputs_are_handled_gracefully( @pytest.mark.parametrize( + # See also https://docs.pydantic.dev/latest/api/networks/#pydantic.networks.HttpUrl for v2 URL requirements "portal", [ "openneuro.org/datasets/ds002080", - "https://openneuro", "not a url", "www.github.com/mycoolrepo/mycooldataset", ], @@ -356,7 +356,7 @@ def test_providing_csv_file_raises_error( assert "Please provide a valid .tsv phenotypic file" in str(e.value) -def test_that_output_file_contains_dataset_level_attributes( +def test_output_file_contains_dataset_level_attributes( runner, test_data, default_pheno_output_path, load_test_json ): runner.invoke( diff --git a/tests/unit/test_model_utils.py b/tests/unit/test_model_utils.py index 41274eeb..010ef60c 100644 --- a/tests/unit/test_model_utils.py +++ b/tests/unit/test_model_utils.py @@ -1,6 +1,9 @@ +from contextlib import nullcontext as does_not_raise + import pytest +from pydantic import ValidationError -from bagel import mappings, models +from bagel import dictionary_models, mappings, models from bagel.utilities import model_utils @@ -30,6 +33,30 @@ def _find_by_key(data, target): return _find_by_key +@pytest.mark.parametrize( + "missing_values,expectation", + [ + (["", "999"], does_not_raise()), + ( + ["", "999", "999"], + pytest.raises(ValidationError, match="not a unique list"), + ), + ], +) +def test_unique_missing_values_validation(missing_values, expectation): + """ + Test that validate_unique_list() correctly validates a list of missing values in a data dictionary column instance. + """ + with expectation: + dictionary_models.Neurobagel( + IsAbout={ + "TermURL": "nb:Sex", + "Label": "Sex", + }, + MissingValues=missing_values, + ) + + def test_all_used_namespaces_have_urls( get_test_context, get_values_by_key, load_test_json, test_data_upload_path ):