diff --git a/bagel/pheno_utils.py b/bagel/pheno_utils.py
index e495939..da337ba 100644
--- a/bagel/pheno_utils.py
+++ b/bagel/pheno_utils.py
@@ -193,6 +193,35 @@ def get_transformed_values(
     return transf_val[0]
 
 
+# TODO: Check all columns and then return list of offending columns' names
+def categorical_cols_have_bids_levels(data_dict: dict) -> bool:
+    for col, attrs in data_dict.items():
+        if (
+            is_column_categorical(col, data_dict)
+            and attrs.get("Levels") is None
+        ):
+            return False
+
+    return True
+
+
+def get_mismatched_categorical_levels(data_dict: dict) -> list:
+    """
+    Returns list of any categorical columns from a data dictionary that have different entries
+    for the "Levels" key between the column's BIDS and Neurobagel annotations.
+    """
+    mismatched_cols = []
+    for col, attrs in data_dict.items():
+        if is_column_categorical(col, data_dict):
+            known_levels = list(attrs["Annotations"]["Levels"].keys()) + attrs[
+                "Annotations"
+            ].get("MissingValues", [])
+            if set(attrs.get("Levels", {}).keys()).difference(known_levels):
+                mismatched_cols.append(col)
+
+    return mismatched_cols
+
+
 def are_not_missing(columns: list, row: pd.Series, data_dict: dict) -> bool:
     """
     Checks that all values in the specified columns are not missing values. This is mainly useful
@@ -224,7 +253,7 @@ def find_undefined_cat_col_values(
     all_undefined_values = {}
     for col, attr in data_dict.items():
         if is_column_categorical(col, data_dict):
-            known_values = list(attr["Levels"].keys()) + attr[
+            known_values = list(attr["Annotations"]["Levels"].keys()) + attr[
                 "Annotations"
             ].get("MissingValues", [])
             unknown_values = []
@@ -300,6 +329,16 @@ def validate_inputs(data_dict: dict, pheno_df: pd.DataFrame) -> None:
             "Please make sure that only one column is annotated for participant and session IDs."
         )
 
+    if not categorical_cols_have_bids_levels(data_dict):
+        warnings.warn(
+            "The data dictionary contains at least one column that looks categorical but lacks a BIDS 'Levels' attribute."
+        )
+
+    if mismatched_cols := get_mismatched_categorical_levels(data_dict):
+        warnings.warn(
+            f"The data dictionary contains columns with mismatched levels between the BIDS and Neurobagel annotations: {mismatched_cols}"
+        )
+
     if not are_inputs_compatible(data_dict, pheno_df):
         raise LookupError(
             "The provided data dictionary and phenotypic file are individually valid, "
diff --git a/bagel/tests/data/README.md b/bagel/tests/data/README.md
index 8b35025..a14268f 100644
--- a/bagel/tests/data/README.md
+++ b/bagel/tests/data/README.md
@@ -16,13 +16,15 @@ Example inputs to the CLI
 | 9            | invalid, based on example 6 but contains an unannotated value for `group`                                                                                                | valid, based on example 6                                                            | fail               |
 | 10           | valid, same as example 6                                                                                                                                                 | valid, based on example 6 but contains extra `"MissingValues"` not found in the .tsv | pass, with warning |
 | synthetic    | valid, has `participant` and `session` IDs corresponding to the [`synthetic` example BIDS dataset](https://github.com/bids-standard/bids-examples/tree/master/synthetic) | valid                                                                                | pass               |
-| 11           | invalid, ex 6 with missing entries in `participant_id` and `session_id` columns                                                                                                           | valid, based on example 6                                                            | fail               |
+| 11           | invalid, ex 6 with missing entries in `participant_id` and `session_id` columns                                                                                          | valid, based on example 6                                                            | fail               |
+| 12           | Valid, same as example 2                                                                                                                                                 | Valid, based on example 2 but missing BIDS "Levels" attribute for group column       | Pass, with warning |
+| 13           | Valid, same as example_synthetic                                                                                                                                         | Valid, based on example_synthetic but with mismatched levels for group column        | Pass, with warning |
 
 `* this is expected to fail until we enable multiple participant_ID handling`.
 
 ## Example expected CLI outputs
 
-| Example                             | Description                                                                                                                                                                                                                                                                                                                         |
-| ----------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| example_synthetic.jsonld            | Sample output of `bagel pheno` command on the `synthetic` example inputs. Contains subject-level annotated phenotypic attributes obtained by essentially applying the data dictionary (.json) to the original tabular data (.tsv).                                                                                                  |
+| Example                             | Description                                                                                                                                                                                                                                                                                                                                                           |
+|-------------------------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| example_synthetic.jsonld            | Sample output of `bagel pheno` command on the `synthetic` example inputs. Contains subject-level annotated phenotypic attributes obtained by essentially applying the data dictionary (.json) to the original tabular data (.tsv).                                                                                                                                    |
 | example_synthetic_pheno-bids.jsonld | Sample output of `bagel bids` command on the example_synthetic.jsonld and the [BIDS `synthetic` example dataset directory](https://github.com/bids-standard/bids-examples/tree/master/synthetic). Contains subject-level annotated imaging metadata (extracted directly from the BIDS dataset structure) on top of the subject-level annotated phenotypic attributes. |
diff --git a/bagel/tests/data/example12.json b/bagel/tests/data/example12.json
new file mode 100644
index 0000000..e9b7d3a
--- /dev/null
+++ b/bagel/tests/data/example12.json
@@ -0,0 +1,75 @@
+{
+  "participant_id": {
+    "Description": "A participant ID",
+    "Annotations": {
+      "IsAbout": {
+        "TermURL": "nb:ParticipantID",
+        "Label": "Unique participant identifier"
+      }
+    }
+  },
+  "session_id": {
+    "Description": "A session ID",
+    "Annotations": {
+      "IsAbout": {
+        "TermURL": "nb:SessionID",
+        "Label": "Unique session identifier"
+      }
+    }
+  },
+  "group": {
+    "Description": "Group variable",
+    "Annotations": {
+      "IsAbout": {
+        "TermURL": "nb:Diagnosis",
+        "Label": "Diagnosis"
+      },
+      "Levels": {
+        "PAT": {
+          "TermURL": "snomed:49049000",
+          "Label": "Parkinson's disease"
+        },
+        "CTRL": {
+          "TermURL": "purl:NCIT_C94342",
+          "Label": "Healthy Control"
+        }
+      }
+    }
+  },
+  "sex": {
+    "Description": "Sex variable",
+    "Levels": {
+      "M": "Male",
+      "F": "Female"
+    },
+    "Annotations": {
+      "IsAbout": {
+        "TermURL": "nb:Sex",
+        "Label": "Sex"
+      },
+      "Levels": {
+        "M": {
+          "TermURL": "snomed:248153007",
+          "Label": "Male"
+        },
+        "F": {
+          "TermURL": "snomed:248152002",
+          "Label": "Female"
+        }
+      }
+    }
+  },
+  "participant_age": {
+    "Description": "Age of the participant",
+    "Annotations": {
+      "IsAbout": {
+        "TermURL": "nb:Age",
+        "Label": "Chronological age"
+      },
+      "Transformation": {
+        "TermURL": "nb:iso8601",
+        "Label": "A period of time defined according to the ISO8601 standard"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/bagel/tests/data/example12.tsv b/bagel/tests/data/example12.tsv
new file mode 100644
index 0000000..705ad3b
--- /dev/null
+++ b/bagel/tests/data/example12.tsv
@@ -0,0 +1,5 @@
+participant_id	session_id	group	sex	participant_age
+sub-01	ses-01	PAT	M	"P20Y6M"
+sub-01	ses-02	PAT	M	"P20Y8M"
+sub-02	ses-01	CTRL	F	"P25Y8M"
+sub-02	ses-02	CTRL	F	"P26Y4M"
diff --git a/bagel/tests/data/example13.json b/bagel/tests/data/example13.json
new file mode 100644
index 0000000..f240156
--- /dev/null
+++ b/bagel/tests/data/example13.json
@@ -0,0 +1,128 @@
+{
+    "participant_id": {
+        "Description": "A participant ID",
+        "Annotations": {
+            "IsAbout": {
+                "TermURL": "nb:ParticipantID",
+                "Label": "Unique participant identifier"
+            }
+        }
+    },
+    "session_id": {
+        "Description": "A session ID",
+        "Annotations": {
+            "IsAbout": {
+                "TermURL": "nb:SessionID",
+                "Label": "Unique session identifier"
+            }
+        }
+    },
+      "pheno_age": {
+    "Description": "Age of the participant",
+    "Annotations": {
+      "IsAbout": {
+        "TermURL": "nb:Age",
+        "Label": "Chronological age"
+      },
+      "Transformation": {
+        "TermURL": "nb:euro",
+        "Label": "writing the time with a comma - why not"
+      },
+      "MissingValues": ["NA"]
+    }
+  },
+    "pheno_sex": {
+    "Description": "Sex variable",
+    "Levels": {
+      "M": "Male",
+      "F": "Female",
+      "missing": "Missing sex",
+      "O": "Other unimportant level"
+    },
+    "Annotations": {
+      "IsAbout": {
+        "TermURL": "nb:Sex",
+        "Label": "Sex"
+      },
+      "Levels": {
+        "M": {
+          "TermURL": "snomed:248153007",
+          "Label": "Male"
+        },
+        "F": {
+          "TermURL": "snomed:248152002",
+          "Label": "Female"
+        }
+      },
+      "MissingValues": ["missing"]
+    }
+  },
+
+    "pheno_group": {
+        "Description": "Group variable",
+        "Levels": {
+            "PAT": "Patient",
+            "CTRL": "Control subject",
+            "NA": "Missing group"
+        },
+        "Annotations": {
+            "IsAbout": {
+                "TermURL": "nb:Diagnosis",
+                "Label": "Diagnosis"
+            },
+            "Levels": {
+                "PAT": {
+                    "TermURL": "snomed:49049000",
+                    "Label": "Parkinson's disease"
+                },
+                "CTRL": {
+                    "TermURL": "purl:NCIT_C94342",
+                    "Label": "Healthy Control"
+                }
+            },
+            "MissingValues": ["NA"]
+        }
+    },
+    "tool1_item1": {
+        "Description": "item 1 scores for tool1",
+        "Annotations": {
+            "IsAbout": {
+                "TermURL": "nb:Assessment",
+                "Label": "Assessment tool"
+            },
+            "IsPartOf": {
+                "TermURL": "cogatlas:1234",
+                "Label": "Imaginary tool"
+            },
+            "MissingValues": ["missing"]
+        }
+    },
+    "tool1_item2": {
+        "Description": "item 2 scores for tool1",
+        "Annotations": {
+            "IsAbout": {
+                "TermURL": "nb:Assessment",
+                "Label": "Assessment tool"
+            },
+            "IsPartOf": {
+                "TermURL": "cogatlas:1234",
+                "Label": "Imaginary tool"
+            },
+            "MissingValues": ["missing"]
+        }
+    },
+    "tool2_item1": {
+        "Description": "item 1 scores for tool2",
+        "Annotations": {
+            "IsAbout": {
+                "TermURL": "nb:Assessment",
+                "Label": "Assessment tool"
+            },
+            "IsPartOf": {
+                "TermURL": "cogatlas:4321",
+                "Label": "A different imaginary tool"
+            },
+            "MissingValues": ["not completed"]
+        }
+    }
+}
\ No newline at end of file
diff --git a/bagel/tests/data/example13.tsv b/bagel/tests/data/example13.tsv
new file mode 100644
index 0000000..675aa6b
--- /dev/null
+++ b/bagel/tests/data/example13.tsv
@@ -0,0 +1,11 @@
+participant_id	session_id	pheno_age	pheno_sex	pheno_group	tool1_item1	tool1_item2	tool2_item1
+sub-01	ses-01	34,1	F	CTRL	good	far	hello
+sub-01	ses-02	35,3	F	CTRL	bad	near	world
+sub-02	ses-01	NA	M	PAT	ok	missing	hello
+sub-02	ses-02	39,0	M	PAT	good	middle	friends
+sub-03	ses-01	22,1	missing	NA	bad	near	not completed
+sub-03	ses-02	23,2	missing	PAT	ok	far	hello
+sub-04	ses-01	21,1	F	CTRL	missing	missing	hello
+sub-04	ses-02	22,3	F	CTRL	good	middle	friends
+sub-05	ses-01	42,5	M	PAT	bad	near	friends
+sub-05	ses-02	43,2	M	PAT	good	far	world
\ No newline at end of file
diff --git a/bagel/tests/test_cli_pheno.py b/bagel/tests/test_cli_pheno.py
index 414b8e6..39d316d 100644
--- a/bagel/tests/test_cli_pheno.py
+++ b/bagel/tests/test_cli_pheno.py
@@ -4,7 +4,15 @@
 
 
 @pytest.mark.parametrize(
-    "example", ["example2", "example4", "example6", "example_synthetic"]
+    "example",
+    [
+        "example2",
+        "example4",
+        "example6",
+        "example12",
+        "example13",
+        "example_synthetic",
+    ],
 )
 def test_pheno_valid_inputs_run_successfully(
     runner, test_data, tmp_path, example
@@ -123,6 +131,66 @@ def test_invalid_portal_uris_produces_error(
     )
 
 
+def test_missing_bids_levels_raises_warning(
+    runner,
+    test_data,
+    tmp_path,
+):
+    with pytest.warns(UserWarning) as w:
+        runner.invoke(
+            bagel,
+            [
+                "pheno",
+                "--pheno",
+                test_data / "example12.tsv",
+                "--dictionary",
+                test_data / "example12.json",
+                "--output",
+                tmp_path,
+                "--name",
+                "testing dataset",
+            ],
+            catch_exceptions=False,
+        )
+
+    assert len(w) == 1
+    assert "looks categorical but lacks a BIDS 'Levels' attribute" in str(
+        w[0].message.args[0]
+    )
+
+
+def test_bids_neurobagel_levels_mismatch_raises_warning(
+    runner,
+    test_data,
+    tmp_path,
+):
+    with pytest.warns(UserWarning) as w:
+        runner.invoke(
+            bagel,
+            [
+                "pheno",
+                "--pheno",
+                test_data / "example13.tsv",
+                "--dictionary",
+                test_data / "example13.json",
+                "--output",
+                tmp_path,
+                "--name",
+                "testing dataset",
+            ],
+            catch_exceptions=False,
+        )
+
+    assert len(w) == 1
+    assert all(
+        warn_substring in str(w[0].message.args[0])
+        for warn_substring in [
+            "columns with mismatched levels",
+            "['pheno_sex']",
+        ]
+    )
+
+
 def test_unused_missing_values_raises_warning(
     runner,
     test_data,