From b1823eeb4845bef962f5b4b295727dcc59310839 Mon Sep 17 00:00:00 2001
From: Alyssa Dai <alyssa.ydai@gmail.com>
Date: Thu, 14 Nov 2024 22:36:49 -0500
Subject: [PATCH] [REF] Upgrade codebase to Pydantic>2 (#389)

* update extra fields config, regex validation

* replace conlist with List and custom AfterValidator

* add type annotation to all schemaKey fields
- now required to prevent error: pydantic.errors.PydanticUserError: Field 'schemaKey' defined on a base class was overridden by a non-annotated attribute. All field definitions, including overrides, require a type annotation.

* use pydantic.RootModel instead of deprecated __root__

* update pydantic model method names

* update generate_context to better accommodate changes to Pydantic internals

* update URL type annots and notes re: deprecation of str inheriting

* remove example portal URL missing TLD
- pydantic v2 HttpUrl no longer requires it

* shorten test name

* update dependencies including pydantic>2

* replace deprecated DataFrame.applymap call

* require python>=3.10 for package and test 3.11

* add python versions badge

* add README note about pip-compile's Python version awareness

* test validate_unique_list() in pydantic model instance

* remove seemingly unnecessary Optional type hint

* raise explicit exception for non-unique missing value list

---------

Co-authored-by: Sebastian Urchs <surchs@users.noreply.github.com>
---
 .github/workflows/test.yml          |   2 +-
 README.md                           |   5 +-
 bagel/dictionary_models.py          |  57 +++++++++---
 bagel/models.py                     |  28 +++---
 bagel/utilities/model_utils.py      |  29 +++---
 bagel/utilities/pheno_utils.py      |  15 ++--
 dev_requirements.txt                | 132 +++++++++++++++++-----------
 requirements.txt                    |  85 +++++++++++-------
 setup.cfg                           |   8 +-
 tests/integration/test_cli_pheno.py |   4 +-
 tests/unit/test_model_utils.py      |  29 +++++-
 11 files changed, 254 insertions(+), 140 deletions(-)
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c6b162bd..9ca85d8b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,7 +9,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.10"]
+        python-version: ["3.10", "3.11"]
 
     steps:
     - uses: actions/checkout@v4
diff --git a/README.md b/README.md
index 4a74c87e..ea6ac78e 100644
--- a/README.md
+++ b/README.md
@@ -4,6 +4,7 @@
 [![Coverage Status](https://coveralls.io/repos/github/neurobagel/bagel-cli/badge.svg?branch=main)](https://coveralls.io/github/neurobagel/bagel-cli?branch=main)
 [![Tests](https://github.com/neurobagel/bagel-cli/actions/workflows/test.yml/badge.svg)](https://github.com/neurobagel/bagel-cli/actions/workflows/test.yml)
 [![Docker Image Version](https://img.shields.io/docker/v/neurobagel/bagelcli?label=docker)](https://hub.docker.com/r/neurobagel/bagelcli/tags)
+[![Python versions](https://img.shields.io/badge/Python-3.10%20%7C%203.11-blue?style=flat)](https://www.python.org)
 
 </div>
 
@@ -61,6 +62,8 @@ The `requirements.txt` file is automatically generated from the `setup.cfg`
 constraints. To update it, we use `pip-compile` from the `pip-tools` package.
 Here is how you can use these tools to update the `requirements.txt` file.
 
+_Note: `pip-compile` will update dependencies based on the Python version of the environment it's running in._
+
 1. Ensure `pip-tools` is installed:
     ```bash
     pip install pip-tools
@@ -72,7 +75,7 @@ Here is how you can use these tools to update the `requirements.txt` file.
 3. The above command only updates the runtime dependencies.
 Now, update the developer dependencies in `dev_requirements.txt`:
     ```bash
-    pip-compile -o dev_requirements.txt --extra all
+    pip-compile -o dev_requirements.txt --extra all --upgrade
     ```
 
 ## Regenerating the Neurobagel vocabulary file
diff --git a/bagel/dictionary_models.py b/bagel/dictionary_models.py
index 5cd2fe5e..e205a2fd 100644
--- a/bagel/dictionary_models.py
+++ b/bagel/dictionary_models.py
@@ -1,6 +1,27 @@
-from typing import Dict, Optional, Union
+from typing import Dict, List, Union
 
-from pydantic import BaseModel, Extra, Field, conlist
+from pydantic import AfterValidator, BaseModel, ConfigDict, Field, RootModel
+from pydantic_core import PydanticCustomError
+from typing_extensions import Annotated
+
+
+def validate_unique_list(values: List[str]) -> List[str]:
+    """
+    Check that provided list only has unique elements.
+
+    This custom validator is needed because constrained dtypes and their `unique_items` parameter
+    were deprecated in Pydantic v2. This function was adapted from https://github.com/pydantic/pydantic-core/pull/820#issuecomment-1656228704
+    and https://docs.pydantic.dev/latest/concepts/validators/#annotated-validators.
+
+    See also:
+    - https://docs.pydantic.dev/latest/migration/#changes-to-pydanticfield
+    - https://docs.pydantic.dev/latest/api/types/#pydantic.types.conlist)
+    """
+    if len(values) != len(set(values)):
+        raise PydanticCustomError(
+            "unique_list", f"{values} is not a unique list"
+        )
+    return values
 
 
 class Identifier(BaseModel):
@@ -27,15 +48,19 @@ class Neurobagel(BaseModel):
         description="The concept or controlled term that describes this column",
         alias="IsAbout",
     )
-    missingValues: conlist(str, unique_items=True) = Field(
-        [],
-        description="A list of unique values that represent "
-        "invalid responses, typos, or missing data",
-        alias="MissingValues",
-    )
+    missingValues: Annotated[
+        List[str],
+        AfterValidator(validate_unique_list),
+        Field(
+            [],
+            description="A list of unique values that represent "
+            "invalid responses, typos, or missing data",
+            alias="MissingValues",
+            json_schema_extra={"uniqueItems": True},
+        ),
+    ]
 
-    class Config:
-        extra = Extra.forbid
+    model_config = ConfigDict(extra="forbid")
 
 
 class CategoricalNeurobagel(Neurobagel):
@@ -66,7 +91,7 @@ class ContinuousNeurobagel(Neurobagel):
 class IdentifierNeurobagel(Neurobagel):
     """A Neurobagel annotation for an identifier column"""
 
-    identifies: "str" = Field(
+    identifies: str = Field(
         ...,
         description="For identifier columns, the type of observation uniquely identified by this column.",
         alias="Identifies",
@@ -76,7 +101,9 @@ class IdentifierNeurobagel(Neurobagel):
 class ToolNeurobagel(Neurobagel):
     """A Neurobagel annotation for an assessment tool column"""
 
-    isPartOf: Optional[Identifier] = Field(
+    # NOTE: Optional[Identifier] was removed as part of https://github.com/neurobagel/bagel-cli/pull/389
+    # because we couldn't tell what the Optional was doing
+    isPartOf: Identifier = Field(
         ...,
         description="If the column is a subscale or item of an assessment tool "
         "then the assessment tool should be specified here.",
@@ -123,7 +150,9 @@ class ContinuousColumn(Column):
     )
 
 
-class DataDictionary(BaseModel):
+class DataDictionary(
+    RootModel[Dict[str, Union[ContinuousColumn, CategoricalColumn]]]
+):
     """A data dictionary with human and machine readable information for a tabular data file"""
 
-    __root__: Dict[str, Union[ContinuousColumn, CategoricalColumn]]
+    pass
diff --git a/bagel/models.py b/bagel/models.py
index 129af284..11c91f6a 100644
--- a/bagel/models.py
+++ b/bagel/models.py
@@ -1,7 +1,7 @@
 import uuid
 from typing import List, Literal, Optional, Union
 
-from pydantic import BaseModel, Extra, Field, HttpUrl
+from pydantic import BaseModel, ConfigDict, Field, HttpUrl
 
 from bagel.mappings import NB
 
@@ -9,15 +9,17 @@
 BAGEL_UUID_PATTERN = rf"^{NB.pf}:{UUID_PATTERN}"
 
 
-class Bagel(BaseModel, extra=Extra.forbid):
+class Bagel(BaseModel):
     """identifier has to be a valid UUID prepended by the Neurobagel namespace
     by default, a random (uuid4) string UUID will be created"""
 
     identifier: str = Field(
-        regex=BAGEL_UUID_PATTERN,
+        pattern=BAGEL_UUID_PATTERN,
         default_factory=lambda: NB.pf + ":" + str(uuid.uuid4()),
     )
 
+    model_config = ConfigDict(extra="forbid")
+
 
 class ControlledTerm(BaseModel):
     identifier: Union[str, HttpUrl]
@@ -25,23 +27,23 @@ class ControlledTerm(BaseModel):
 
 
 class Sex(ControlledTerm):
-    schemaKey = "Sex"
+    schemaKey: Literal["Sex"] = "Sex"
 
 
 class Diagnosis(ControlledTerm):
-    schemaKey = "Diagnosis"
+    schemaKey: Literal["Diagnosis"] = "Diagnosis"
 
 
 class SubjectGroup(ControlledTerm):
-    schemaKey = "SubjectGroup"
+    schemaKey: Literal["SubjectGroup"] = "SubjectGroup"
 
 
 class Assessment(ControlledTerm):
-    schemaKey = "Assessment"
+    schemaKey: Literal["Assessment"] = "Assessment"
 
 
 class Image(ControlledTerm):
-    schemaKey = "Image"
+    schemaKey: Literal["Image"] = "Image"
 
 
 class Acquisition(Bagel):
@@ -50,7 +52,7 @@ class Acquisition(Bagel):
 
 
 class Pipeline(ControlledTerm):
-    schemaKey = "Pipeline"
+    schemaKey: Literal["Pipeline"] = "Pipeline"
 
 
 class CompletedPipeline(Bagel):
@@ -69,7 +71,7 @@ class PhenotypicSession(Session):
     isSubjectGroup: Optional[SubjectGroup] = None
     hasDiagnosis: Optional[List[Diagnosis]] = None
     hasAssessment: Optional[List[Assessment]] = None
-    schemaKey = "PhenotypicSession"
+    schemaKey: Literal["PhenotypicSession"] = "PhenotypicSession"
 
 
 class ImagingSession(Session):
@@ -77,7 +79,7 @@ class ImagingSession(Session):
     hasFilePath: Optional[str] = None
     hasAcquisition: Optional[List[Acquisition]] = None
     hasCompletedPipeline: Optional[List[CompletedPipeline]] = None
-    schemaKey = "ImagingSession"
+    schemaKey: Literal["ImagingSession"] = "ImagingSession"
 
 
 class Subject(Bagel):
@@ -88,6 +90,8 @@ class Subject(Bagel):
 
 class Dataset(Bagel):
     hasLabel: str
-    hasPortalURI: Optional[HttpUrl] = None
+    # NOTE: Since Pydantic v2, URL types no longer inherit from `str`
+    # (see https://docs.pydantic.dev/latest/migration/#url-and-dsn-types-in-pydanticnetworks-no-longer-inherit-from-str)
+    hasPortalURI: Optional[Union[str, HttpUrl]] = None
     hasSamples: List[Subject]
     schemaKey: Literal["Dataset"] = "Dataset"
diff --git a/bagel/utilities/model_utils.py b/bagel/utilities/model_utils.py
index fa47f022..7e609eb5 100644
--- a/bagel/utilities/model_utils.py
+++ b/bagel/utilities/model_utils.py
@@ -1,3 +1,4 @@
+import inspect
 from pathlib import Path
 from typing import Iterable
 
@@ -11,24 +12,22 @@
 
 
 def generate_context():
-    # Direct copy of the dandi-schema context generation function
+    # Adapted from the dandi-schema context generation function
     # https://github.com/dandi/dandi-schema/blob/c616d87eaae8869770df0cb5405c24afdb9db096/dandischema/metadata.py
     field_preamble = {
         namespace.pf: namespace.url for namespace in ALL_NAMESPACES
     }
     fields = {}
-    for val in dir(models):
-        klass = getattr(models, val)
-        if not isinstance(klass, pydantic.main.ModelMetaclass):
-            continue
-        fields[klass.__name__] = f"{NB.pf}:{klass.__name__}"
-        for name, field in klass.__fields__.items():
-            if name == "schemaKey":
-                fields[name] = "@type"
-            elif name == "identifier":
-                fields[name] = "@id"
-            elif name not in fields:
-                fields[name] = {"@id": f"{NB.pf}:{name}"}
+    for klass_name, klass in inspect.getmembers(models):
+        if inspect.isclass(klass) and issubclass(klass, pydantic.BaseModel):
+            fields[klass_name] = f"{NB.pf}:{klass_name}"
+            for name, field in klass.model_fields.items():
+                if name == "schemaKey":
+                    fields[name] = "@type"
+                elif name == "identifier":
+                    fields[name] = "@id"
+                elif name not in fields:
+                    fields[name] = {"@id": f"{NB.pf}:{name}"}
 
     field_preamble.update(**fields)
 
@@ -41,7 +40,7 @@ def add_context_to_graph_dataset(dataset: models.Dataset) -> dict:
     # We can't just exclude_unset here because the identifier and schemaKey
     # for each instance are created as default values and so technically are never set
     # TODO: we should revisit this because there may be reasons to have None be meaningful in the future
-    return {**context, **dataset.dict(exclude_none=True)}
+    return {**context, **dataset.model_dump(exclude_none=True)}
 
 
 def get_subs_missing_from_pheno_data(
@@ -80,7 +79,7 @@ def extract_and_validate_jsonld_dataset(file_path: Path) -> models.Dataset:
     jsonld = file_utils.load_json(file_path)
     jsonld.pop("@context")
     try:
-        jsonld_dataset = models.Dataset.parse_obj(jsonld)
+        jsonld_dataset = models.Dataset.model_validate(jsonld)
     except ValidationError as err:
         typer.echo(
             typer.style(
diff --git a/bagel/utilities/pheno_utils.py b/bagel/utilities/pheno_utils.py
index 6b28ddb0..791d98d6 100644
--- a/bagel/utilities/pheno_utils.py
+++ b/bagel/utilities/pheno_utils.py
@@ -13,7 +13,7 @@
 from bagel import dictionary_models, mappings
 from bagel.mappings import NB
 
-DICTIONARY_SCHEMA = dictionary_models.DataDictionary.schema()
+DICTIONARY_SCHEMA = dictionary_models.DataDictionary.model_json_schema()
 
 AGE_HEURISTICS = {
     "float": NB.pf + ":FromFloat",
@@ -24,10 +24,13 @@
 }
 
 
-def validate_portal_uri(portal: str) -> Optional[str]:
+def validate_portal_uri(portal: Optional[str]) -> Optional[str]:
     """Custom validation that portal is a valid HttpUrl"""
+    # NOTE: We need Optional in the validation type below to account for --portal being an optional argument in the pheno command
     try:
-        pydantic.parse_obj_as(Optional[pydantic.HttpUrl], portal)
+        pydantic.TypeAdapter(Optional[pydantic.HttpUrl]).validate_python(
+            portal
+        )
     except pydantic.ValidationError as err:
         raise BadParameter(
             "Not a valid http or https URL: "
@@ -281,11 +284,7 @@ def get_rows_with_empty_strings(df: pd.DataFrame, columns: list) -> list:
     """For specified columns, returns the indices of rows with empty strings"""
     # NOTE: Profile this section if things get slow, transforming "" -> nan and then
     # using .isna() will very likely be much faster
-    empty_row = (
-        df[columns]
-        .applymap(lambda cell: cell == "")
-        .apply(lambda row: any([value for value in row]), axis=1)
-    )
+    empty_row = df[columns].eq("").any(axis=1)
     return list(empty_row[empty_row].index)
 
 
diff --git a/dev_requirements.txt b/dev_requirements.txt
index cd2dceaf..e7de63f8 100644
--- a/dev_requirements.txt
+++ b/dev_requirements.txt
@@ -4,35 +4,45 @@
 #
 #    pip-compile --extra=all --output-file=dev_requirements.txt
 #
+annotated-types==0.7.0
+    # via pydantic
 astor==0.8.1
     # via formulaic
-attrs==23.1.0
+attrs==24.2.0
     # via
     #   jsonschema
     #   referencing
-bids-validator==1.12.0
+bids-validator==1.14.7.post0
     # via pybids
-black==24.3.0
+bidsschematools==0.11.3.post2
+    # via bids-validator
+black==24.10.0
     # via flake8-black
-build==0.10.0
+build==1.2.2.post1
     # via pip-tools
 cfgv==3.4.0
     # via pre-commit
 click==8.1.7
     # via
+    #   bidsschematools
     #   black
     #   pip-tools
     #   pybids
     #   typer
-coverage==7.3.0
+colorama==0.4.6
+    # via
+    #   build
+    #   click
+    #   pytest
+coverage==7.6.4
     # via bagel (setup.py)
-distlib==0.3.7
+distlib==0.3.9
     # via virtualenv
 docopt==0.6.2
     # via num2words
-filelock==3.12.2
+filelock==3.16.1
     # via virtualenv
-flake8==6.1.0
+flake8==7.1.1
     # via
     #   bagel (setup.py)
     #   flake8-black
@@ -40,19 +50,25 @@ flake8-black==0.3.6
     # via bagel (setup.py)
 formulaic==0.5.2
     # via pybids
-greenlet==2.0.2
+fsspec==2024.10.0
+    # via universal-pathlib
+greenlet==3.1.1
     # via sqlalchemy
-identify==2.5.27
+identify==2.6.2
     # via pre-commit
+importlib-resources==6.4.5
+    # via nibabel
 iniconfig==2.0.0
     # via pytest
 interface-meta==1.3.0
     # via formulaic
-isodate==0.6.1
+isodate==0.7.2
     # via bagel (setup.py)
-jsonschema==4.19.0
-    # via bagel (setup.py)
-jsonschema-specifications==2023.7.1
+jsonschema==4.23.0
+    # via
+    #   bagel (setup.py)
+    #   bidsschematools
+jsonschema-specifications==2024.10.1
     # via jsonschema
 markdown-it-py==3.0.0
     # via rich
@@ -62,96 +78,108 @@ mdurl==0.1.2
     # via markdown-it-py
 mypy-extensions==1.0.0
     # via black
-nibabel==5.1.0
+nibabel==5.3.2
     # via pybids
-nodeenv==1.8.0
+nodeenv==1.9.1
     # via pre-commit
-num2words==0.5.12
+num2words==0.5.13
     # via pybids
-numpy==1.25.2
+numpy==2.1.3
     # via
     #   formulaic
     #   nibabel
     #   pandas
     #   pybids
     #   scipy
-packaging==23.1
+packaging==24.2
     # via
     #   black
     #   build
     #   nibabel
     #   pytest
-pandas==2.0.3
+pandas==2.2.3
     # via
     #   formulaic
     #   pybids
-pathspec==0.11.2
+pathspec==0.12.1
     # via black
-pip-tools==7.3.0
+pip-tools==7.4.1
     # via bagel (setup.py)
-platformdirs==3.10.0
+platformdirs==4.3.6
     # via
     #   black
     #   virtualenv
-pluggy==1.2.0
+pluggy==1.5.0
     # via pytest
-pre-commit==3.3.3
+pre-commit==4.0.1
     # via bagel (setup.py)
-pybids==0.16.3
+pybids==0.17.2
     # via bagel (setup.py)
-pycodestyle==2.11.0
+pycodestyle==2.12.1
     # via flake8
-pydantic==1.10.13
+pydantic==2.9.2
     # via bagel (setup.py)
-pyflakes==3.1.0
+pydantic-core==2.23.4
+    # via pydantic
+pyflakes==3.2.0
     # via flake8
-pygments==2.16.1
+pygments==2.18.0
     # via rich
-pyproject-hooks==1.0.0
-    # via build
-pytest==7.4.0
+pyproject-hooks==1.2.0
+    # via
+    #   build
+    #   pip-tools
+pytest==8.3.3
     # via bagel (setup.py)
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
     # via pandas
-pytz==2023.3
+pytz==2024.2
     # via pandas
-pyyaml==6.0.1
-    # via pre-commit
-referencing==0.30.2
+pyyaml==6.0.2
+    # via
+    #   bidsschematools
+    #   pre-commit
+referencing==0.35.1
     # via
     #   jsonschema
     #   jsonschema-specifications
-rich==13.5.2
-    # via bagel (setup.py)
-rpds-py==0.9.2
+rich==13.9.4
+    # via
+    #   bagel (setup.py)
+    #   typer
+rpds-py==0.21.0
     # via
     #   jsonschema
     #   referencing
-scipy==1.11.2
+scipy==1.14.1
     # via
     #   formulaic
     #   pybids
+shellingham==1.5.4
+    # via typer
 six==1.16.0
-    # via
-    #   isodate
-    #   python-dateutil
-sqlalchemy==2.0.20
+    # via python-dateutil
+sqlalchemy==2.0.36
     # via pybids
-typer==0.9.0
+typer==0.13.0
     # via bagel (setup.py)
-typing-extensions==4.7.1
+typing-extensions==4.12.2
     # via
     #   formulaic
+    #   nibabel
     #   pydantic
+    #   pydantic-core
     #   sqlalchemy
     #   typer
-tzdata==2023.3
+tzdata==2024.2
     # via pandas
-virtualenv==20.24.3
+universal-pathlib==0.2.5
+    # via pybids
+virtualenv==20.27.1
     # via pre-commit
-wheel==0.41.2
+wheel==0.45.0
     # via pip-tools
-wrapt==1.15.0
+wrapt==1.16.0
     # via formulaic
 
 # The following packages are considered to be unsafe in a requirements file:
diff --git a/requirements.txt b/requirements.txt
index dae9b6fb..08fa3aea 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,94 +2,117 @@
 # This file is autogenerated by pip-compile with Python 3.11
 # by the following command:
 #
-#    pip-compile
+#    pip-compile --output-file=requirements.txt
 #
+annotated-types==0.7.0
+    # via pydantic
 astor==0.8.1
     # via formulaic
-attrs==23.1.0
+attrs==24.2.0
     # via
     #   jsonschema
     #   referencing
-bids-validator==1.12.0
+bids-validator==1.14.7.post0
     # via pybids
+bidsschematools==0.11.3.post2
+    # via bids-validator
 click==8.1.7
     # via
+    #   bidsschematools
     #   pybids
     #   typer
+colorama==0.4.6
+    # via click
 docopt==0.6.2
     # via num2words
 formulaic==0.5.2
     # via pybids
-greenlet==2.0.2
+fsspec==2024.10.0
+    # via universal-pathlib
+greenlet==3.1.1
     # via sqlalchemy
+importlib-resources==6.4.5
+    # via nibabel
 interface-meta==1.3.0
     # via formulaic
-isodate==0.6.1
-    # via bagel (setup.py)
-jsonschema==4.19.0
+isodate==0.7.2
     # via bagel (setup.py)
-jsonschema-specifications==2023.7.1
+jsonschema==4.23.0
+    # via
+    #   bagel (setup.py)
+    #   bidsschematools
+jsonschema-specifications==2024.10.1
     # via jsonschema
 markdown-it-py==3.0.0
     # via rich
 mdurl==0.1.2
     # via markdown-it-py
-nibabel==5.1.0
+nibabel==5.3.2
     # via pybids
-num2words==0.5.12
+num2words==0.5.13
     # via pybids
-numpy==1.25.2
+numpy==2.1.3
     # via
     #   formulaic
     #   nibabel
     #   pandas
     #   pybids
     #   scipy
-packaging==23.1
+packaging==24.2
     # via nibabel
-pandas==2.0.3
+pandas==2.2.3
     # via
     #   formulaic
     #   pybids
-pybids==0.16.3
+pybids==0.17.2
     # via bagel (setup.py)
-pydantic==1.10.13
+pydantic==2.9.2
     # via bagel (setup.py)
-pygments==2.16.1
+pydantic-core==2.23.4
+    # via pydantic
+pygments==2.18.0
     # via rich
-python-dateutil==2.8.2
+python-dateutil==2.9.0.post0
     # via pandas
-pytz==2023.3
+pytz==2024.2
     # via pandas
-referencing==0.30.2
+pyyaml==6.0.2
+    # via bidsschematools
+referencing==0.35.1
     # via
     #   jsonschema
     #   jsonschema-specifications
-rich==13.5.2
-    # via bagel (setup.py)
-rpds-py==0.9.2
+rich==13.9.4
+    # via
+    #   bagel (setup.py)
+    #   typer
+rpds-py==0.21.0
     # via
     #   jsonschema
     #   referencing
-scipy==1.11.2
+scipy==1.14.1
     # via
     #   formulaic
     #   pybids
+shellingham==1.5.4
+    # via typer
 six==1.16.0
-    # via
-    #   isodate
-    #   python-dateutil
-sqlalchemy==2.0.20
+    # via python-dateutil
+sqlalchemy==2.0.36
     # via pybids
-typer==0.9.0
+typer==0.13.0
     # via bagel (setup.py)
-typing-extensions==4.7.1
+typing-extensions==4.12.2
     # via
     #   formulaic
+    #   nibabel
     #   pydantic
+    #   pydantic-core
     #   sqlalchemy
     #   typer
-tzdata==2023.3
+tzdata==2024.2
     # via pandas
-wrapt==1.15.0
+universal-pathlib==0.2.5
+    # via pybids
+wrapt==1.16.0
     # via formulaic
diff --git a/setup.cfg b/setup.cfg
index 00c575a6..eece2fe4 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,4 +1,6 @@
+# NOTE: bagel will currently always be installed as version 0.0.0 when using pip
 [metadata]
+name = bagel
 license = MIT
 license_files = LICENSE
 author = neurobagel developers
@@ -13,16 +15,16 @@ classifiers =
     Development Status :: 1 - Planning
     Intended Audience :: Science/Research
     License :: OSI Approved :: MIT License
-    Programming Language :: Python :: 3.9
     Programming Language :: Python :: 3.10
+    Programming Language :: Python :: 3.11
 
 [options]
-python_requires = >= 3.9
+python_requires = >= 3.10
 install_requires =
     pybids
     typer
     rich
-    pydantic<2
+    pydantic
     jsonschema
     isodate
 zip_safe = False
diff --git a/tests/integration/test_cli_pheno.py b/tests/integration/test_cli_pheno.py
index aa257fce..8bb60891 100644
--- a/tests/integration/test_cli_pheno.py
+++ b/tests/integration/test_cli_pheno.py
@@ -149,10 +149,10 @@ def test_invalid_inputs_are_handled_gracefully(
 
 
 @pytest.mark.parametrize(
+    # See also https://docs.pydantic.dev/latest/api/networks/#pydantic.networks.HttpUrl for v2 URL requirements
     "portal",
     [
         "openneuro.org/datasets/ds002080",
-        "https://openneuro",
         "not a url",
         "www.github.com/mycoolrepo/mycooldataset",
     ],
@@ -356,7 +356,7 @@ def test_providing_csv_file_raises_error(
     assert "Please provide a valid .tsv phenotypic file" in str(e.value)
 
 
-def test_that_output_file_contains_dataset_level_attributes(
+def test_output_file_contains_dataset_level_attributes(
     runner, test_data, default_pheno_output_path, load_test_json
 ):
     runner.invoke(
diff --git a/tests/unit/test_model_utils.py b/tests/unit/test_model_utils.py
index 41274eeb..010ef60c 100644
--- a/tests/unit/test_model_utils.py
+++ b/tests/unit/test_model_utils.py
@@ -1,6 +1,9 @@
+from contextlib import nullcontext as does_not_raise
+
 import pytest
+from pydantic import ValidationError
 
-from bagel import mappings, models
+from bagel import dictionary_models, mappings, models
 from bagel.utilities import model_utils
 
 
@@ -30,6 +33,30 @@ def _find_by_key(data, target):
     return _find_by_key
 
 
+@pytest.mark.parametrize(
+    "missing_values,expectation",
+    [
+        (["", "999"], does_not_raise()),
+        (
+            ["", "999", "999"],
+            pytest.raises(ValidationError, match="not a unique list"),
+        ),
+    ],
+)
+def test_unique_missing_values_validation(missing_values, expectation):
+    """
+    Test that validate_unique_list() correctly validates a list of missing values in a data dictionary column instance.
+    """
+    with expectation:
+        dictionary_models.Neurobagel(
+            IsAbout={
+                "TermURL": "nb:Sex",
+                "Label": "Sex",
+            },
+            MissingValues=missing_values,
+        )
+
+
 def test_all_used_namespaces_have_urls(
     get_test_context, get_values_by_key, load_test_json, test_data_upload_path
 ):