diff --git a/.github/workflows/pr_checks.yml b/.github/workflows/pr_checks.yml index d6719934..0ab63d87 100644 --- a/.github/workflows/pr_checks.yml +++ b/.github/workflows/pr_checks.yml @@ -93,9 +93,9 @@ jobs: Pytest-Matrix: strategy: matrix: - python_version: ["3.8", "3.9", "3.10", "3.11"] - pydantic_version: ["1.8.2", "1.9.2", "1.10.9"] - pyyaml_version: ["5.4.1", "6.0"] + python_version: ["3.9", "3.10", "3.11"] + pydantic_version: ["2.2.1", "2.3.0", "2.4.2", "2.5.0"] + pyyaml_version: ["5.4.1", "6.0.1"] runs-on: ubuntu-latest continue-on-error: true steps: diff --git a/CHANGELOG.md b/CHANGELOG.md index d52970a3..f0dea563 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,9 +24,16 @@ The types of changes are: - Give Flexible Legal Basis a default of True [#184](https://github.com/ethyca/fideslang/pull/184) +## [3.0.0 Unreleased] + +### Changed + +- Updated to Pydantic 2.X, which is now the only supported version [#160](https://github.com/ethyca/fideslang/pull/160) +- Removed Python 3.8 from the list of supported versions [#160](https://github.com/ethyca/fideslang/pull/160) + ## [2.2.1](https://github.com/ethyca/fideslang/compare/2.2.0...2.2.1) -### Added +### Added - Added a `System.cookies` property to support `Cookie` records explicitly associated with a `System` generally [#181](https://github.com/ethyca/fideslang/pull/181) - Added a `System.previous_vendor_id` property to support to associate a `System` record with a "deprecated" vendor record [#182](https://github.com/ethyca/fideslang/pull/182) diff --git a/Dockerfile b/Dockerfile index 87c40171..5b0aa15c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.8-slim-bullseye as base +FROM python:3.9-slim-bullseye as base # Update pip in the base image since we'll use it everywhere RUN pip install -U pip diff --git a/dev-requirements.txt b/dev-requirements.txt index f91fc28a..3bdcd709 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,7 +2,7 @@ black==23.3.0 mypy==1.4.0 nox>=2023 packaging>=22.0 -pre-commit==2.9.3 +pre-commit==3.5.0 pylint==2.10.0 pytest==7.3.1 pytest-cov==2.11.1 diff --git a/noxfile.py b/noxfile.py index 2b0dd342..f14f82c4 100644 --- a/noxfile.py +++ b/noxfile.py @@ -3,9 +3,10 @@ nox.options.sessions = [] nox.options.reuse_existing_virtualenvs = True -TESTED_PYTHON_VERSIONS = ["3.8", "3.9", "3.10", "3.11"] -TESTED_PYDANTIC_VERSIONS = ["1.8.2", "1.9.2", "1.10.9"] -TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0"] +# These should match what is in the `pr_checks.yml` file for CI runs +TESTED_PYTHON_VERSIONS = ["3.9", "3.10", "3.11"] +TESTED_PYDANTIC_VERSIONS = ["2.2.1", "2.3.0", "2.4.2", "2.5.0"] +TESTED_PYYAML_VERSIONS = ["5.4.1", "6.0.1"] def install_requirements(session: nox.Session) -> None: diff --git a/pyproject.toml b/pyproject.toml index 47a26082..c9befed7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,13 +9,12 @@ name = "fideslang" description = "Fides Taxonomy Language" dynamic = ["dependencies", "version"] readme = "README.md" -requires-python = ">=3.8, <4" +requires-python = ">=3.9, <4" authors = [{ name = "Ethyca, Inc.", email = "fidesteam@ethyca.com" }] license = { text = "Apache License 2.0" } classifiers = [ "License :: OSI Approved :: Apache Software License", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/requirements.txt b/requirements.txt index cc280885..82fdc338 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -pydantic>=1.8.1,<1.11.0 +pydantic>=2.2.1,<=2.6.0 pyyaml>=5,<7 packaging>=20.0 diff --git a/src/fideslang/__init__.py b/src/fideslang/__init__.py index 9d18a523..ce1c9adb 100644 --- a/src/fideslang/__init__.py +++ b/src/fideslang/__init__.py @@ -33,7 +33,6 @@ DataSubject, DataUse, Evaluation, - FidesCollectionKey, FidesDatasetReference, FidesMeta, FidesModel, @@ -46,6 +45,7 @@ System, Taxonomy, ) +from .validation import FidesCollectionKey FidesModelType = Union[Type[FidesModel], Type[Evaluation]] model_map: Dict[str, FidesModelType] = { diff --git a/src/fideslang/default_taxonomy/utils.py b/src/fideslang/default_taxonomy/utils.py index f97cc94f..3550170e 100644 --- a/src/fideslang/default_taxonomy/utils.py +++ b/src/fideslang/default_taxonomy/utils.py @@ -18,5 +18,5 @@ def default_factory(taxonomy_class: CustomType, **kwargs: Dict) -> CustomType: # This is the version where we started tracking from, so # we use it as the default starting point. kwargs["version_added"] = "2.0.0" # type: ignore[assignment] - item = taxonomy_class.parse_obj(kwargs) + item = taxonomy_class.model_validate(obj=kwargs) return item diff --git a/src/fideslang/gvl/__init__.py b/src/fideslang/gvl/__init__.py index 2a298eab..ec1c7d0a 100644 --- a/src/fideslang/gvl/__init__.py +++ b/src/fideslang/gvl/__init__.py @@ -50,16 +50,16 @@ def _load_data() -> None: ) as mapping_file: data = load(mapping_file) for raw_purpose in data["purposes"].values(): - purpose = Purpose.parse_obj(raw_purpose) - mapped_purpose = MappedPurpose.parse_obj(raw_purpose) + purpose = Purpose.model_validate(raw_purpose) + mapped_purpose = MappedPurpose.model_validate(raw_purpose) GVL_PURPOSES[purpose.id] = purpose MAPPED_PURPOSES[mapped_purpose.id] = mapped_purpose for data_use in mapped_purpose.data_uses: MAPPED_PURPOSES_BY_DATA_USE[data_use] = mapped_purpose for raw_special_purpose in data["specialPurposes"].values(): - special_purpose = Purpose.parse_obj(raw_special_purpose) - mapped_special_purpose = MappedPurpose.parse_obj(raw_special_purpose) + special_purpose = Purpose.model_validate(raw_special_purpose) + mapped_special_purpose = MappedPurpose.model_validate(raw_special_purpose) GVL_SPECIAL_PURPOSES[special_purpose.id] = special_purpose MAPPED_SPECIAL_PURPOSES[mapped_special_purpose.id] = mapped_special_purpose for data_use in mapped_special_purpose.data_uses: @@ -71,12 +71,12 @@ def _load_data() -> None: feature_data = load(feature_mapping_file) for raw_feature in feature_data["features"].values(): - feature = Feature.parse_obj(raw_feature) + feature = Feature.model_validate(raw_feature) GVL_FEATURES[feature.id] = feature FEATURES_BY_NAME[feature.name] = feature for raw_special_feature in feature_data["specialFeatures"].values(): - special_feature = Feature.parse_obj(raw_special_feature) + special_feature = Feature.model_validate(raw_special_feature) GVL_SPECIAL_FEATURES[special_feature.id] = special_feature FEATURES_BY_NAME[special_feature.name] = special_feature @@ -86,8 +86,8 @@ def _load_data() -> None: data_category_data = load(data_category_mapping_file) for raw_data_category in data_category_data.values(): - data_category = GVLDataCategory.parse_obj(raw_data_category) - mapped_data_category = MappedDataCategory.parse_obj(raw_data_category) + data_category = GVLDataCategory.model_validate(raw_data_category) + mapped_data_category = MappedDataCategory.model_validate(raw_data_category) GVL_DATA_CATEGORIES[data_category.id] = data_category MAPPED_GVL_DATA_CATEGORIES[mapped_data_category.id] = mapped_data_category diff --git a/src/fideslang/models.py b/src/fideslang/models.py index 350caf17..670958eb 100644 --- a/src/fideslang/models.py +++ b/src/fideslang/models.py @@ -6,27 +6,27 @@ from __future__ import annotations from enum import Enum -from typing import Any, Dict, List, Optional, Union +from typing import Dict, List, Optional, Union from warnings import warn +from packaging.version import InvalidVersion, Version from pydantic import ( - AnyUrl, BaseModel, - ConstrainedStr, + ConfigDict, Field, - HttpUrl, + ValidationInfo, PositiveInt, - root_validator, - validator, + field_validator, + model_validator, ) from fideslang.validation import ( + FidesCollectionKey, FidesKey, - FidesVersion, + FidesValidationError, check_valid_country_code, deprecated_version_later_than_added, has_versioning_if_default, - is_deprecated_if_replaced, matching_parent_key, no_self_reference, parse_data_type_string, @@ -36,29 +36,14 @@ ) # Reusable Validators -country_code_validator = validator("third_country_transfers", allow_reuse=True)( +country_code_validator = field_validator("third_country_transfers")( check_valid_country_code ) -matching_parent_key_validator = validator("parent_key", allow_reuse=True, always=True)( - matching_parent_key -) -no_self_reference_validator = validator("parent_key", allow_reuse=True)( - no_self_reference -) -has_versioning_if_default_validator = validator( - "is_default", allow_reuse=True, always=True -)(has_versioning_if_default) -deprecated_version_later_than_added_validator = validator( - "version_deprecated", allow_reuse=True -)(deprecated_version_later_than_added) -is_deprecated_if_replaced_validator = validator("replaced_by", allow_reuse=True)( - is_deprecated_if_replaced -) # Reusable Fields name_field = Field(description="Human-Readable name for this resource.") description_field = Field( - description="A detailed description of what this resource is." + default=None, description="A detailed description of what this resource is." ) meta_field = Field( default=None, @@ -77,13 +62,13 @@ class FidesModel(BaseModel): description="Defines the Organization that this resource belongs to.", ) tags: Optional[List[str]] = None - name: Optional[str] = name_field - description: Optional[str] = description_field - - class Config: - "Config for the FidesModel" - extra = "ignore" - orm_mode = True + name: Optional[str] = Field( + default=None, description="Human-Readable name for this resource." + ) + description: Optional[str] = Field( + default=None, description="A detailed description of what this resource is." + ) + model_config = ConfigDict(extra="ignore", from_attributes=True) class DefaultModel(BaseModel): @@ -108,39 +93,42 @@ class DefaultModel(BaseModel): description="Denotes whether the resource is part of the default taxonomy or not.", ) - _has_versioning_if_default: classmethod = has_versioning_if_default_validator - _deprecated_version_later_than_added: classmethod = ( - deprecated_version_later_than_added_validator - ) - _is_deprecated_if_replaced: classmethod = is_deprecated_if_replaced_validator + @model_validator(mode="after") + def verify_version_info(self) -> "DefaultModel": + """Compose all of the version checks into a single validator.""" + version_added = self.version_added + version_deprecated = self.version_deprecated + replaced_by = self.replaced_by + is_default = self.is_default + + if version_added: + try: + Version(version_added) + except InvalidVersion: + raise FidesValidationError( + f"Field 'version_added' does not have a valid version: {version_added}" + ) - @validator("version_added") - @classmethod - def validate_verion_added( - cls, version_added: Optional[str], values: Dict - ) -> Optional[str]: - """ - Validate that the `version_added` field is a proper FidesVersion - """ - if not version_added: - return None + if version_deprecated: + try: + Version(version_deprecated) + except InvalidVersion: + raise FidesValidationError( + f"Field 'version_deprecated' does not have a valid version: {version_deprecated}" + ) - FidesVersion.validate(version_added) - return version_added + deprecated_version_later_than_added( + Version(version_deprecated), version_added + ) - @validator("version_deprecated") - @classmethod - def validate_version_deprecated( - cls, version_deprecated: Optional[str], values: Dict - ) -> Optional[str]: - """ - Validate that the `version_deprecated` is a proper FidesVersion - """ - if not version_deprecated: - return None + has_versioning_if_default( + is_default, version_added, version_deprecated, replaced_by + ) - FidesVersion.validate(version_deprecated) - return version_deprecated + if replaced_by and not version_deprecated: + raise FidesValidationError("Cannot be replaced without deprecation!") + + return self class DataResponsibilityTitle(str, Enum): @@ -280,23 +268,27 @@ class SpecialCategoryLegalBasisEnum(str, Enum): class DataCategory(FidesModel, DefaultModel): """The DataCategory resource model.""" - parent_key: Optional[FidesKey] + parent_key: Optional[FidesKey] = None + + @model_validator(mode="after") + def parent_key_checks(self) -> "DataCategory": + """Verify that the parent key is valid.""" + fides_key = self.fides_key + parent_key = self.parent_key - _matching_parent_key: classmethod = matching_parent_key_validator - _no_self_reference: classmethod = no_self_reference_validator + no_self_reference(parent_key=parent_key, fides_key=fides_key) + matching_parent_key(parent_key=parent_key, fides_key=fides_key) + + return self class Cookies(BaseModel): """The Cookies resource model""" name: str - path: Optional[str] - domain: Optional[str] - - class Config: - """Config for the cookies""" - - orm_mode = True + path: Optional[str] = None + domain: Optional[str] = None + model_config = ConfigDict(from_attributes=True) class DataSubjectRights(BaseModel): @@ -312,29 +304,32 @@ class DataSubjectRights(BaseModel): description="Defines the strategy used when mapping data rights to a data subject.", ) values: Optional[List[DataSubjectRightsEnum]] = Field( + default=None, description="A list of valid data subject rights to be used when applying data rights to a data subject via a strategy.", ) - @root_validator() - @classmethod - def include_exclude_has_values(cls, values: Dict) -> Dict: + @model_validator(mode="after") + def include_exclude_has_values(self) -> "DataSubjectRights": """ Validate the if include or exclude is chosen, that at least one value is present. """ - strategy, rights = values.get("strategy"), values.get("values") + strategy, rights = self.strategy, self.values if strategy in ("INCLUDE", "EXCLUDE"): assert ( rights is not None ), f"If {strategy} is chosen, rights must also be listed." - return values + return self class DataSubject(FidesModel, DefaultModel): """The DataSubject resource model.""" - rights: Optional[DataSubjectRights] = Field(description=DataSubjectRights.__doc__) + rights: Optional[DataSubjectRights] = Field( + default=None, description=DataSubjectRights.__doc__ or "" + ) automated_decisions_or_profiling: Optional[bool] = Field( + default=None, description="A boolean value to annotate whether or not automated decisions/profiling exists for the data subject.", ) @@ -344,60 +339,56 @@ class DataUse(FidesModel, DefaultModel): parent_key: Optional[FidesKey] = None legal_basis: Optional[LegalBasisEnum] = Field( + default=None, description="Deprecated. The legal basis category of which the data use falls under. This field is used as part of the creation of an exportable data map.", ) special_category: Optional[SpecialCategoriesEnum] = Field( + default=None, description="Deprecated. The special category for processing of which the data use falls under. This field is used as part of the creation of an exportable data map.", ) recipients: Optional[List[str]] = Field( + default=None, description="Deprecated. An array of recipients when sharing personal data outside of your organization.", ) legitimate_interest: Optional[bool] = Field( + default=None, description="Deprecated. A boolean representation of if the legal basis used is `Legitimate Interest`. Validated at run time and looks for a `legitimate_interest_impact_assessment` to exist if true.", ) - legitimate_interest_impact_assessment: Optional[AnyUrl] = Field( + legitimate_interest_impact_assessment: Optional[str] = Field( + default=None, description="Deprecated. A url pointing to the legitimate interest impact assessment. Required if the legal bases used is legitimate interest.", ) - _matching_parent_key: classmethod = matching_parent_key_validator - _no_self_reference: classmethod = no_self_reference_validator + @model_validator(mode="after") + def parent_key_checks(self) -> "DataUse": + """Verify that the parent key is valid.""" + fides_key = self.fides_key + parent_key = self.parent_key - @root_validator - @classmethod - def deprecate_fields(cls, values: Dict) -> Dict: - """ - Warn of Data Use fields pending deprecation. - """ - deprecated_fields = [ - "legal_basis", - "recipients", - "special_category", - "legitimate_interest", - "legitimate_interest_impact_assessment", - ] - for field in deprecated_fields: - if values.get(field) is not None: - warn( - f"The {field} field is deprecated, and will be removed in a future version of fideslang.", - DeprecationWarning, - ) - return values + no_self_reference(parent_key=parent_key, fides_key=fides_key) + matching_parent_key(parent_key=parent_key, fides_key=fides_key) - @validator("legitimate_interest", always=True) + return self + + @field_validator("legitimate_interest") @classmethod - def set_legitimate_interest(cls, value: bool, values: Dict) -> bool: + def set_legitimate_interest(cls, value: bool, info: ValidationInfo) -> bool: """Sets if a legitimate interest is used.""" + values = info.data + if values["legal_basis"] == "Legitimate Interests": value = True return value - @validator("legitimate_interest_impact_assessment", always=True) + @field_validator("legitimate_interest_impact_assessment") @classmethod - def ensure_impact_assessment(cls, value: AnyUrl, values: Dict) -> AnyUrl: + def ensure_impact_assessment(cls, value: str, info: ValidationInfo) -> str: """ Validates an impact assessment is applied if a legitimate interest has been defined. """ + values = info.data + if values["legitimate_interest"]: assert ( value is not None @@ -431,9 +422,11 @@ class MyDatasetField(DatasetFieldBase): name: str = name_field description: Optional[str] = description_field data_categories: Optional[List[FidesKey]] = Field( + default=None, description="Arrays of Data Categories, identified by `fides_key`, that applies to this field.", ) retention: Optional[str] = Field( + default=None, description="An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset.", ) @@ -450,36 +443,42 @@ class FidesDatasetReference(BaseModel): dataset: FidesKey field: str - direction: Optional[EdgeDirection] + direction: Optional[EdgeDirection] = None class FidesMeta(BaseModel): """Supplementary metadata used by the Fides application for additional features.""" references: Optional[List[FidesDatasetReference]] = Field( - description="Fields that current field references or is referenced by. Used for drawing the edges of a DSR graph.", default=None, + description="Fields that current field references or is referenced by. Used for drawing the edges of a DSR graph.", ) identity: Optional[str] = Field( - description="The type of the identity data that should be used to query this collection for a DSR." + default=None, + description="The type of the identity data that should be used to query this collection for a DSR.", ) primary_key: Optional[bool] = Field( - description="Whether the current field can be considered a primary key of the current collection" + default=None, + description="Whether the current field can be considered a primary key of the current collection", ) data_type: Optional[str] = Field( - description="Optionally specify the data type. Fides will attempt to cast values to this type when querying." + default=None, + description="Optionally specify the data type. Fides will attempt to cast values to this type when querying.", ) length: Optional[PositiveInt] = Field( - description="Optionally specify the allowable field length. Fides will not generate values that exceed this size." + default=None, + description="Optionally specify the allowable field length. Fides will not generate values that exceed this size.", ) return_all_elements: Optional[bool] = Field( - description="Optionally specify to query for the entire array if the array is an entrypoint into the node. Default is False." + default=None, + description="Optionally specify to query for the entire array if the array is an entrypoint into the node. Default is False.", ) read_only: Optional[bool] = Field( - description="Optionally specify if a field is read-only, meaning it can't be updated or deleted." + default=None, + description="Optionally specify if a field is read-only, meaning it can't be updated or deleted.", ) - @validator("data_type") + @field_validator("data_type") @classmethod def valid_data_type(cls, value: Optional[str]) -> Optional[str]: """Validate that all annotated data types exist in the taxonomy""" @@ -511,10 +510,11 @@ class DatasetField(DatasetFieldBase, FidesopsMetaBackwardsCompat): fides_meta: Optional[FidesMeta] = None fields: Optional[List[DatasetField]] = Field( + default=None, description="An optional array of objects that describe hierarchical/nested fields (typically found in NoSQL databases).", ) - @validator("fides_meta") + @field_validator("fides_meta") @classmethod def valid_meta(cls, meta_values: Optional[FidesMeta]) -> Optional[FidesMeta]: """Validate upfront that the return_all_elements flag can only be specified on array fields""" @@ -530,17 +530,18 @@ def valid_meta(cls, meta_values: Optional[FidesMeta]) -> Optional[FidesMeta]: ) return meta_values - @validator("fields") + @field_validator("fields") @classmethod - def validate_object_fields( # type: ignore + def validate_object_fields( cls, fields: Optional[List["DatasetField"]], - values: Dict[str, Any], + info: ValidationInfo, ) -> Optional[List["DatasetField"]]: """Two validation checks for object fields: - If there are sub-fields specified, type should be either empty or 'object' - Additionally object fields cannot have data_categories. """ + values = info.data declared_data_type = None field_name: str = values.get("name") # type: ignore @@ -550,12 +551,12 @@ def validate_object_fields( # type: ignore if fields and declared_data_type: data_type, _ = parse_data_type_string(declared_data_type) if data_type != "object": - raise ValueError( + raise FidesValidationError( f"The data type '{data_type}' on field '{field_name}' is not compatible with specified sub-fields. Convert to an 'object' field." ) if (fields or declared_data_type == "object") and values.get("data_categories"): - raise ValueError( + raise FidesValidationError( f"Object field '{field_name}' cannot have specified data_categories. Specify category on sub-field instead" ) @@ -563,34 +564,13 @@ def validate_object_fields( # type: ignore # this is required for the recursive reference in the pydantic model: -DatasetField.update_forward_refs() - - -class FidesCollectionKey(ConstrainedStr): - """ - Dataset.Collection name where both dataset and collection names are valid FidesKeys - """ - - @classmethod - def validate(cls, value: str) -> str: - """ - Overrides validation to check FidesCollectionKey format, and that both the dataset - and collection names have the FidesKey format. - """ - values = value.split(".") - if len(values) == 2: - FidesKey.validate(values[0]) - FidesKey.validate(values[1]) - return value - raise ValueError( - "FidesCollection must be specified in the form 'FidesKey.FidesKey'" - ) +DatasetField.model_rebuild() class CollectionMeta(BaseModel): """Collection-level specific annotations used for query traversal""" - after: Optional[List[FidesCollectionKey]] + after: Optional[List[FidesCollectionKey]] = None skip_processing: Optional[bool] = False @@ -604,23 +584,21 @@ class DatasetCollection(FidesopsMetaBackwardsCompat): name: str = name_field description: Optional[str] = description_field data_categories: Optional[List[FidesKey]] = Field( + default=None, description="Array of Data Category resources identified by `fides_key`, that apply to all fields in the collection.", ) retention: Optional[str] = Field( + default=None, description="An optional string to describe the retention policy for a Dataset collection. This field can also be applied more granularly at the field level of a Dataset.", ) fields: List[DatasetField] = Field( description="An array of objects that describe the collection's fields.", ) - fides_meta: Optional[CollectionMeta] = None + fides_meta: Optional[CollectionMeta] = Field(default=None) - _sort_fields: classmethod = validator("fields", allow_reuse=True)( - sort_list_objects_by_name - ) - _unique_items_in_list: classmethod = validator("fields", allow_reuse=True)( - unique_items_in_list - ) + _sort_fields = field_validator("fields")(sort_list_objects_by_name) + _unique_items_in_list = field_validator("fields")(unique_items_in_list) class ContactDetails(BaseModel): @@ -659,8 +637,8 @@ class DatasetMetadata(BaseModel): Object used to hold application specific metadata for a dataset """ - resource_id: Optional[str] - after: Optional[List[FidesKey]] + resource_id: Optional[str] = None + after: Optional[List[FidesKey]] = None class Dataset(FidesModel, FidesopsMetaBackwardsCompat): @@ -668,51 +646,32 @@ class Dataset(FidesModel, FidesopsMetaBackwardsCompat): meta: Optional[Dict] = meta_field data_categories: Optional[List[FidesKey]] = Field( + default=None, description="Array of Data Category resources identified by `fides_key`, that apply to all collections in the Dataset.", ) fides_meta: Optional[DatasetMetadata] = Field( - description=DatasetMetadata.__doc__, default=None + default=None, + description=DatasetMetadata.__doc__, ) joint_controller: Optional[ContactDetails] = Field( - description="Deprecated. " + ContactDetails.__doc__, + default=None, + description="Deprecated. " + (ContactDetails.__doc__ or ""), ) retention: Optional[str] = Field( + default=None, description="Deprecated. An optional string to describe the retention policy for a dataset. This field can also be applied more granularly at either the Collection or field level of a Dataset.", ) third_country_transfers: Optional[List[str]] = Field( + default=None, description="Deprecated. An optional array to identify any third countries where data is transited to. For consistency purposes, these fields are required to follow the Alpha-3 code set in [ISO 3166-1](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3).", ) collections: List[DatasetCollection] = Field( description="An array of objects that describe the Dataset's collections.", ) - _sort_collections: classmethod = validator("collections", allow_reuse=True)( - sort_list_objects_by_name - ) - _check_valid_country_code: classmethod = country_code_validator - _unique_items_in_list: classmethod = validator("collections", allow_reuse=True)( - unique_items_in_list - ) - - @root_validator - @classmethod - def deprecate_fields(cls, values: Dict) -> Dict: - """ - Warn of Dataset fields pending deprecation. - """ - # TODO: Do we want to remove these for Fideslang 3? - deprecated_fields = [ - "joint_controller", - "retention", - "third_country_transfers", - ] - for field in deprecated_fields: - if values.get(field) is not None: - warn( - f"The {field} field is deprecated, and will be removed in a future version of fideslang.", - DeprecationWarning, - ) - return values + _sort_collections = field_validator("collections")(sort_list_objects_by_name) + _check_valid_country_code = country_code_validator + _unique_items_in_list = field_validator("collections")(unique_items_in_list) # Evaluation @@ -767,11 +726,7 @@ class Evaluation(BaseModel): default="", description="A human-readable string response for the evaluation.", ) - - class Config: - "Config for the Evaluation" - extra = "ignore" - orm_mode = True + model_config = ConfigDict(extra="ignore", from_attributes=True) # Organization @@ -796,7 +751,8 @@ class OrganizationMetadata(BaseModel): """ resource_filters: Optional[List[ResourceFilter]] = Field( - description="A list of filters that can be used when generating or scanning systems." + default=None, + description="A list of filters that can be used when generating or scanning systems.", ) @@ -813,19 +769,23 @@ class Organization(FidesModel): description="An inherited field from the FidesModel that is unused with an Organization.", ) controller: Optional[ContactDetails] = Field( + default=None, description=ContactDetails.__doc__, ) data_protection_officer: Optional[ContactDetails] = Field( + default=None, description=ContactDetails.__doc__, ) fidesctl_meta: Optional[OrganizationMetadata] = Field( + default=None, description=OrganizationMetadata.__doc__, ) representative: Optional[ContactDetails] = Field( + default=None, description=ContactDetails.__doc__, ) - security_policy: Optional[HttpUrl] = Field( - description="Am optional URL to the organization security policy." + security_policy: Optional[str] = Field( + default=None, description="An optional URL to the organization security policy." ) @@ -888,9 +848,7 @@ class Policy(FidesModel): description=PolicyRule.__doc__, ) - _sort_rules: classmethod = validator("rules", allow_reuse=True)( - sort_list_objects_by_name - ) + _sort_rules = field_validator("rules")(sort_list_objects_by_name) # Registry @@ -921,9 +879,11 @@ class DataProtectionImpactAssessment(BaseModel): description="A boolean value determining if a data protection impact assessment is required. Defaults to False.", ) progress: Optional[str] = Field( + default=None, description="The optional status of a Data Protection Impact Assessment. Returned on an exported data map or RoPA.", ) - link: Optional[AnyUrl] = Field( + link: Optional[str] = Field( + default=None, description="The optional link to the Data Protection Impact Assessment. Returned on an exported data map or RoPA.", ) @@ -937,48 +897,57 @@ class PrivacyDeclaration(BaseModel): """ name: Optional[str] = Field( + default=None, description="The name of the privacy declaration on the system.", ) data_categories: List[FidesKey] = Field( + default=[], description="An array of data categories describing a system in a privacy declaration.", ) data_use: FidesKey = Field( description="The Data Use describing a system in a privacy declaration.", ) data_subjects: List[FidesKey] = Field( - default_factory=list, + default=[], description="An array of data subjects describing a system in a privacy declaration.", ) dataset_references: Optional[List[FidesKey]] = Field( + default=None, description="Referenced Dataset fides keys used by the system.", ) egress: Optional[List[FidesKey]] = Field( - description="The resources to which data is sent. Any `fides_key`s included in this list reference `DataFlow` entries in the `egress` array of any `System` resources to which this `PrivacyDeclaration` is applied." + default=None, + description="The resources to which data is sent. Any `fides_key`s included in this list reference `DataFlow` entries in the `egress` array of any `System` resources to which this `PrivacyDeclaration` is applied.", ) ingress: Optional[List[FidesKey]] = Field( - description="The resources from which data is received. Any `fides_key`s included in this list reference `DataFlow` entries in the `ingress` array of any `System` resources to which this `PrivacyDeclaration` is applied." + default=None, + description="The resources from which data is received. Any `fides_key`s included in this list reference `DataFlow` entries in the `ingress` array of any `System` resources to which this `PrivacyDeclaration` is applied.", ) features: List[str] = Field( - default_factory=list, description="The features of processing personal data." + default=[], description="The features of processing personal data." ) flexible_legal_basis_for_processing: bool = Field( - description="Whether the legal basis for processing is 'flexible' (i.e. can be overridden in a privacy notice) for this declaration.", default=True, + description="Whether the legal basis for processing is 'flexible' (i.e. can be overridden in a privacy notice) for this declaration.", ) legal_basis_for_processing: Optional[LegalBasisForProcessingEnum] = Field( - description="The legal basis under which personal data is processed for this purpose." + default=None, + description="The legal basis under which personal data is processed for this purpose.", ) impact_assessment_location: Optional[str] = Field( - description="Where the legitimate interest impact assessment is stored" + default=None, + description="Where the legitimate interest impact assessment is stored", ) retention_period: Optional[str] = Field( - description="An optional string to describe the time period for which data is retained for this purpose." + default=None, + description="An optional string to describe the time period for which data is retained for this purpose.", ) processes_special_category_data: bool = Field( default=False, description="This system processes special category data", ) special_category_legal_basis: Optional[SpecialCategoryLegalBasisEnum] = Field( + default=None, description="The legal basis under which the special category data is processed.", ) data_shared_with_third_parties: bool = Field( @@ -986,20 +955,18 @@ class PrivacyDeclaration(BaseModel): description="This system shares data with third parties for this purpose.", ) third_parties: Optional[str] = Field( + default=None, description="The types of third parties the data is shared with.", ) shared_categories: List[str] = Field( - default_factory=list, + default=[], description="The categories of personal data that this system shares with third parties.", ) cookies: Optional[List[Cookies]] = Field( + default=[], description="Cookies associated with this data use to deliver services and functionality", ) - - class Config: - """Config for the Privacy Declaration""" - - orm_mode = True + model_config = ConfigDict(from_attributes=True) class SystemMetadata(BaseModel): @@ -1010,13 +977,16 @@ class SystemMetadata(BaseModel): """ resource_id: Optional[str] = Field( - description="The external resource id for the system being modeled." + default=None, + description="The external resource id for the system being modeled.", ) endpoint_address: Optional[str] = Field( - description="The host of the external resource for the system being modeled." + default=None, + description="The host of the external resource for the system being modeled.", ) endpoint_port: Optional[str] = Field( - description="The port of the external resource for the system being modeled." + default=None, + description="The port of the external resource for the system being modeled.", ) @@ -1046,25 +1016,25 @@ class DataFlow(BaseModel): description=f"Specifies the resource model class for which the `fides_key` applies. May be any of {', '.join([member.value for member in FlowableResources])}.", ) data_categories: Optional[List[FidesKey]] = Field( + default=None, description="An array of data categories describing the data in transit.", ) - @root_validator(skip_on_failure=True) - @classmethod - def user_special_case(cls, values: Dict) -> Dict: + @model_validator(mode="after") + def user_special_case(self) -> "DataFlow": """ If either the `fides_key` or the `type` are set to "user", then the other must also be set to "user". """ - if values["fides_key"] == "user" or values["type"] == "user": + if self.fides_key == "user" or self.type == "user": assert ( - values["fides_key"] == "user" and values["type"] == "user" + self.fides_key == "user" and self.type == "user" ), "The 'user' fides_key is required for, and requires, the type 'user'" - return values + return self - @validator("type") + @field_validator("type") @classmethod def verify_type_is_flowable(cls, value: str) -> str: """ @@ -1085,31 +1055,38 @@ class System(FidesModel): """ registry_id: Optional[int] = Field( + default=None, description="The id of the system registry, if used.", + strict=False, # This allows Pydantic to coerce '1' -> 1 ) meta: Optional[Dict] = meta_field fidesctl_meta: Optional[SystemMetadata] = Field( + default=None, description=SystemMetadata.__doc__, ) system_type: str = Field( description="A required value to describe the type of system being modeled, examples include: Service, Application, Third Party, etc.", ) data_responsibility_title: Optional[DataResponsibilityTitle] = Field( + default=None, description="Deprecated. The responsibility or role over the system that processes personal data", ) egress: Optional[List[DataFlow]] = Field( - description="The resources to which the system sends data." + default=None, description="The resources to which the system sends data." ) ingress: Optional[List[DataFlow]] = Field( - description="The resources from which the system receives data." + default=None, description="The resources from which the system receives data." ) privacy_declarations: List[PrivacyDeclaration] = Field( description=PrivacyDeclaration.__doc__, ) joint_controller: Optional[ContactDetails] = Field( - description="Deprecated. " + ContactDetails.__doc__, + default=None, + description="Deprecated. " + + (ContactDetails.__doc__ or ""), # The 'or' is to satisfy a type issue ) third_country_transfers: Optional[List[str]] = Field( + default=None, description="Deprecated. An optional array to identify any third countries where data is transited to. For consistency purposes, these fields are required to follow the Alpha-3 code set in ISO 3166-1.", ) administrating_department: Optional[str] = Field( @@ -1117,16 +1094,22 @@ class System(FidesModel): description="An optional value to identify the owning department or group of the system within your organization", ) data_protection_impact_assessment: Optional[DataProtectionImpactAssessment] = Field( - description="Deprecated. " + DataProtectionImpactAssessment.__doc__, + default=None, + description="Deprecated. " + + ( + DataProtectionImpactAssessment.__doc__ or "" + ), # The 'or' is to satisfy a type issue ) vendor_id: Optional[str] = Field( - description="The unique identifier for the vendor that's associated with this system." + default=None, + description="The unique identifier for the vendor that's associated with this system.", ) previous_vendor_id: Optional[str] = Field( - description="If specified, the unique identifier for the vendor that was previously associated with this system." + default=None, + description="If specified, the unique identifier for the vendor that was previously associated with this system.", ) - dataset_references: List[FidesKey] = Field( - default_factory=list, + dataset_references: Optional[List[FidesKey]] = Field( + default=[], description="Referenced Dataset fides keys used by the system.", ) processes_personal_data: bool = Field( @@ -1138,14 +1121,15 @@ class System(FidesModel): description="This toggle indicates whether the system is exempt from privacy regulation if they do process personal data.", ) reason_for_exemption: Optional[str] = Field( - description="The reason that the system is exempt from privacy regulation." + default=None, + description="The reason that the system is exempt from privacy regulation.", ) uses_profiling: bool = Field( default=False, description="Whether the vendor uses data to profile a consumer in a way that has a legal effect.", ) legal_basis_for_profiling: List[LegalBasisForProfilingEnum] = Field( - default_factory=list, + default=[], description="The legal basis (or bases) for performing profiling that has a legal effect.", ) does_international_transfers: bool = Field( @@ -1161,35 +1145,41 @@ class System(FidesModel): description="Whether this system requires data protection impact assessments.", ) dpa_location: Optional[str] = Field( - description="Location where the DPAs or DIPAs can be found." + default=None, description="Location where the DPAs or DIPAs can be found." ) dpa_progress: Optional[str] = Field( - description="The optional status of a Data Protection Impact Assessment" + default=None, + description="The optional status of a Data Protection Impact Assessment", ) - privacy_policy: Optional[AnyUrl] = Field( - description="A URL that points to the system's publicly accessible privacy policy." + privacy_policy: Optional[str] = Field( + default=None, + description="A URL that points to the System's publicly accessible privacy policy.", ) legal_name: Optional[str] = Field( - description="The legal name for the business represented by the system." + default=None, + description="The legal name for the business represented by the system.", ) legal_address: Optional[str] = Field( - description="The legal address for the business represented by the system." + default=None, + description="The legal address for the business represented by the system.", ) responsibility: List[DataResponsibilityTitle] = Field( - default_factory=list, + default=[], description=DataResponsibilityTitle.__doc__, ) dpo: Optional[str] = Field( - description="The official privacy contact address or DPO." + default=None, description="The official privacy contact address or DPO." ) joint_controller_info: Optional[str] = Field( - description="The party or parties that share the responsibility for processing personal data." + default=None, + description="The party or parties that share the responsibility for processing personal data.", ) # Use joint_controller_info in favor of joint_controller data_security_practices: Optional[str] = Field( - description="The data security practices employed by this system." + default=None, description="The data security practices employed by this system." ) cookie_max_age_seconds: Optional[int] = Field( - description="The maximum storage duration, in seconds, for cookies used by this system." + default=None, + description="The maximum storage duration, in seconds, for cookies used by this system.", ) uses_cookies: bool = Field( default=False, description="Whether this system uses cookie storage." @@ -1202,71 +1192,53 @@ class System(FidesModel): default=False, description="Whether the system uses non-cookie methods of storage or accessing information stored on a user's device.", ) - legitimate_interest_disclosure_url: Optional[AnyUrl] = Field( - description="A URL that points to the system's publicly accessible legitimate interest disclosure." + legitimate_interest_disclosure_url: Optional[str] = Field( + default=None, + description="A URL that points to the system's publicly accessible legitimate interest disclosure.", ) cookies: Optional[List[Cookies]] = Field( + default=None, description="System-level cookies unassociated with a data use to deliver services and functionality", ) - _sort_privacy_declarations: classmethod = validator( - "privacy_declarations", allow_reuse=True - )(sort_list_objects_by_name) + _sort_privacy_declarations = field_validator("privacy_declarations")( + sort_list_objects_by_name + ) - _check_valid_country_code: classmethod = country_code_validator + _check_valid_country_code = country_code_validator - @root_validator - @classmethod - def deprecate_fields(cls, values: Dict) -> Dict: - """ - Warn of System fields pending deprecation. - """ - deprecated_fields = [ - "joint_controller", - "third_country_transfers", - "data_responsibility_title", - "data_protection_impact_assessment", - ] - for field in deprecated_fields: - if values.get(field) is not None: - warn( - f"The {field} field is deprecated, and will be removed in a future version of fideslang.", - DeprecationWarning, - ) - return values - - @validator("privacy_declarations", each_item=True) - @classmethod - def privacy_declarations_reference_data_flows( - cls, - value: PrivacyDeclaration, - values: Dict, - ) -> PrivacyDeclaration: + @model_validator(mode="after") + def verify_privacy_declarations(self) -> "System": """ Any `PrivacyDeclaration`s which include `egress` and/or `ingress` fields must only reference the `fides_key`s of defined `DataFlow`s in said field(s). """ - for direction in ["egress", "ingress"]: - fides_keys = getattr(value, direction, None) - if fides_keys is not None: - data_flows = values[direction] - system = values["fides_key"] - assert ( - data_flows is not None and len(data_flows) > 0 - ), f"PrivacyDeclaration '{value.name}' defines {direction} with one or more resources and is applied to the System '{system}', which does not itself define any {direction}." - - for fides_key in fides_keys: - assert fides_key in [ - data_flow.fides_key for data_flow in data_flows - ], f"PrivacyDeclaration '{value.name}' defines {direction} with '{fides_key}' and is applied to the System '{system}', which does not itself define {direction} with that resource." - - return value - - class Config: - """Class for the System config""" - - use_enum_values = True + if self.privacy_declarations: + for ( + declaration + ) in self.privacy_declarations: # pylint: disable=not-an-iterable + for direction in ["egress", "ingress"]: + flow_fides_keys = getattr(declaration, direction, None) + if flow_fides_keys is not None: + data_flows = getattr(self, direction) + system = self.fides_key + + assert ( + data_flows is not None and len(data_flows) > 0 + ), f"PrivacyDeclaration '{declaration.name}' defines {direction} with one or more resources and is applied to the System '{system}', which does not itself define any {direction}." + + for ( + fides_key + ) in flow_fides_keys: # pylint: disable=not-an-iterable + assert fides_key in [ + data_flow.fides_key + for data_flow in data_flows # pylint: disable=not-an-iterable + ], f"PrivacyDeclaration '{declaration.name}' defines {direction} with '{fides_key}' and is applied to the System '{system}', which does not itself define {direction} with that resource." + + return self + + model_config = ConfigDict(use_enum_values=True) # Taxonomy diff --git a/src/fideslang/parse.py b/src/fideslang/parse.py index 94462d94..9c1a25c5 100644 --- a/src/fideslang/parse.py +++ b/src/fideslang/parse.py @@ -19,7 +19,7 @@ def parse_dict( raise SystemExit(1) try: - parsed_manifest = model_map[resource_type].parse_obj(resource) + parsed_manifest = model_map[resource_type].model_validate(resource) except Exception as err: print( "Failed to parse {} from {}:\n{}".format( @@ -34,7 +34,7 @@ def load_manifests_into_taxonomy(raw_manifests: Dict[str, List[Dict]]) -> Taxono """ Parse the raw resource manifests into resource resources. """ - taxonomy = Taxonomy.parse_obj( + taxonomy = Taxonomy.model_validate( { resource_type: [ parse_dict(resource_type, resource) for resource in resource_list diff --git a/src/fideslang/relationships.py b/src/fideslang/relationships.py index b238a226..8840bbdd 100644 --- a/src/fideslang/relationships.py +++ b/src/fideslang/relationships.py @@ -75,7 +75,7 @@ def get_referenced_missing_keys(taxonomy: Taxonomy) -> Set[FidesKey]: """ referenced_keys: List[Set[FidesKey]] = [ find_referenced_fides_keys(resource) - for resource_type in taxonomy.__fields_set__ + for resource_type in taxonomy.model_fields_set for resource in getattr(taxonomy, resource_type) ] key_set: Set[FidesKey] = set( diff --git a/src/fideslang/utils.py b/src/fideslang/utils.py index 5b64dbcb..e2c490bc 100644 --- a/src/fideslang/utils.py +++ b/src/fideslang/utils.py @@ -16,7 +16,7 @@ def get_resource_by_fides_key( return { resource_type: resource - for resource_type in taxonomy.__fields_set__ + for resource_type in taxonomy.model_fields_set for resource in getattr(taxonomy, resource_type) if resource.fides_key == fides_key } or None diff --git a/src/fideslang/validation.py b/src/fideslang/validation.py index b7f4a3d6..b3ca93bf 100644 --- a/src/fideslang/validation.py +++ b/src/fideslang/validation.py @@ -3,50 +3,54 @@ """ import re from collections import Counter -from typing import Dict, Generator, List, Optional, Pattern, Set, Tuple +from typing import Annotated, Dict, List, Optional, Pattern, Set, Tuple from packaging.version import Version -from pydantic import ConstrainedStr +from pydantic.functional_validators import BeforeValidator from fideslang.default_fixtures import COUNTRY_CODES VALID_COUNTRY_CODES = [country["alpha3Code"] for country in COUNTRY_CODES] +FIDES_KEY_PATTERN = r"^[a-zA-Z0-9_.<>-]+$" class FidesValidationError(ValueError): """Custom exception for when the pydantic ValidationError can't be used.""" -class FidesVersion(Version): - """Validate strings as proper semantic versions.""" +def validate_fides_key(value: str) -> str: + """Throws ValueError if val is not a valid FidesKey""" - @classmethod - def __get_validators__(cls) -> Generator: - yield cls.validate + regex: Pattern[str] = re.compile(FIDES_KEY_PATTERN) + if not regex.match(value): + raise FidesValidationError( + f"FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: {value}" + ) - @classmethod - def validate(cls, value: str) -> Version: - """Validates that the provided string is a valid Semantic Version.""" - return Version(value) + return value -class FidesKey(ConstrainedStr): - """ - A FidesKey type that creates a custom constrained string. - """ +FidesKey = Annotated[str, BeforeValidator(validate_fides_key)] - regex: Pattern[str] = re.compile(r"^[a-zA-Z0-9_.<>-]+$") - @classmethod # This overrides the default method to throw the custom FidesValidationError - def validate(cls, value: str) -> str: - """Throws ValueError if val is not a valid FidesKey""" +def validate_collection_key_parts(value: str) -> str: + """ + Overrides validation to check FidesCollectionKey format, and that both the dataset + and collection names have the FidesKey format. + """ + values = value.split(".") + if len(values) == 2: + validate_fides_key(values[0]) + validate_fides_key(values[1]) + else: + raise ValueError( + "FidesCollection must be specified in the form 'FidesKey.FidesKey'" + ) + return value - if not cls.regex.match(value): - raise FidesValidationError( - f"FidesKeys must only contain alphanumeric characters, '.', '_', '<', '>' or '-'. Value provided: {value}" - ) - return value +# Dataset.Collection name where both dataset and collection names are valid FidesKeys +FidesCollectionKey = Annotated[str, BeforeValidator(validate_collection_key_parts)] def sort_list_objects_by_name(values: List) -> List: @@ -77,45 +81,34 @@ def unique_items_in_list(values: List) -> List: return values -def no_self_reference(value: FidesKey, values: Dict) -> FidesKey: - """ - Check to make sure that the fides_key doesn't match other fides_key - references within an object. - - i.e. DataCategory.parent_key != DataCategory.fides_key - """ - fides_key = FidesKey.validate(values.get("fides_key", "")) - if value == fides_key: - raise FidesValidationError("FidesKey can not self-reference!") - return value - - def deprecated_version_later_than_added( - version_deprecated: Optional[FidesVersion], values: Dict -) -> Optional[FidesVersion]: + version_deprecated: Version, version_added: Optional[str] +) -> None: """ Check to make sure that the deprecated version is later than the added version. This will also catch errors where the deprecated version is defined but the added version is empty. """ + parsed_version_added = Version(version_added) if version_added else Version("0") - if not version_deprecated: - return None - - if version_deprecated < values.get("version_added", Version("0")): + if version_deprecated < parsed_version_added: raise FidesValidationError( "Deprecated version number can't be earlier than version added!" ) - if version_deprecated == values.get("version_added", Version("0")): + if version_deprecated == parsed_version_added: raise FidesValidationError( "Deprecated version number can't be the same as the version added!" ) - return version_deprecated -def has_versioning_if_default(is_default: bool, values: Dict) -> bool: +def has_versioning_if_default( + is_default: bool, + version_added: Optional[str], + version_deprecated: Optional[str], + replaced_by: Optional[str], +) -> None: """ Check to make sure that version fields are set for default items. """ @@ -123,45 +116,42 @@ def has_versioning_if_default(is_default: bool, values: Dict) -> bool: # If it's a default item, it at least needs a starting version if is_default: try: - assert values.get("version_added") + assert version_added except AssertionError: raise FidesValidationError("Default items must have version information!") # If it's not default, it shouldn't have version info else: try: - assert not values.get("version_added") - assert not values.get("version_deprecated") - assert not values.get("replaced_by") + assert not version_added + assert not version_deprecated + assert not replaced_by except AssertionError: raise FidesValidationError( "Non-default items can't have version information!" ) - return is_default - -def is_deprecated_if_replaced(replaced_by: str, values: Dict) -> str: +def no_self_reference(parent_key: Optional[str], fides_key: str) -> None: """ - Check to make sure that the item has been deprecated if there is a replacement. - """ - - if replaced_by and not values.get("version_deprecated"): - raise FidesValidationError("Cannot be replaced without deprecation!") + Check to make sure that the fides_key doesn't match other fides_key + references within an object. - return replaced_by + i.e. DataCategory.parent_key != DataCategory.fides_key + """ + if parent_key == fides_key: + raise FidesValidationError("FidesKey can not self-reference!") -def matching_parent_key(parent_key: FidesKey, values: Dict) -> FidesKey: +def matching_parent_key(parent_key: Optional[str], fides_key: str) -> None: """ Confirm that the parent_key matches the parent parsed from the FidesKey. """ - fides_key = FidesKey.validate(values.get("fides_key", "")) - split_fides_key = fides_key.split(".") + split_fides_key = str(fides_key).split(".") # Check if it is a top-level resource if len(split_fides_key) == 1 and not parent_key: - return parent_key + return # Reform the parent_key from the fides_key and compare parent_key_from_fides_key = ".".join(split_fides_key[:-1]) @@ -171,7 +161,6 @@ def matching_parent_key(parent_key: FidesKey, values: Dict) -> FidesKey: parent_key, parent_key_from_fides_key, fides_key ) ) - return parent_key def check_valid_country_code(country_code_list: List) -> List: diff --git a/tests/conftest.py b/tests/conftest.py index 89a099f5..f621201c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,14 +16,14 @@ def resources_dict(): """ resources_dict: Dict[str, Any] = { "data_category": models.DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="user.custom", parent_key="user", name="Custom Data Category", description="Custom Data Category", ), "dataset": models.Dataset( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_sample_db_dataset", name="Sample DB Dataset", description="This is a Sample Database Dataset", @@ -53,13 +53,13 @@ def resources_dict(): ], ), "data_subject": models.DataSubject( - organization_fides_key=1, + organization_fides_key="1", fides_key="custom_subject", name="Custom Data Subject", description="Custom Data Subject", ), "data_use": models.DataUse( - organization_fides_key=1, + organization_fides_key="1", fides_key="custom_data_use", name="Custom Data Use", description="Custom Data Use", @@ -73,7 +73,7 @@ def resources_dict(): description="Test Organization", ), "policy": models.Policy( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_policy", name="Test Policy", version="1.3", @@ -87,15 +87,15 @@ def resources_dict(): data_subjects=models.PrivacyRule(matches="ANY", values=[]), ), "registry": models.Registry( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_registry", name="Test Registry", description="Test Regsitry", systems=[], ), "system": models.System( - organization_fides_key=1, - registryId=1, + organization_fides_key="1", + registry_id="1", fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -120,7 +120,7 @@ def test_manifests(): "dataset": [ { "name": "Test Dataset 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "description": "Test Dataset 1", @@ -131,7 +131,7 @@ def test_manifests(): "system": [ { "name": "Test System 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 1", "fides_key": "some_system", @@ -143,7 +143,7 @@ def test_manifests(): { "name": "Test Dataset 2", "description": "Test Dataset 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "fides_key": "another_dataset", @@ -153,7 +153,7 @@ def test_manifests(): "system": [ { "name": "Test System 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 2", "fides_key": "another_system", diff --git a/tests/fideslang/gvl/test_gvl.py b/tests/fideslang/gvl/test_gvl.py index abda6fd2..118ba492 100644 --- a/tests/fideslang/gvl/test_gvl.py +++ b/tests/fideslang/gvl/test_gvl.py @@ -1,14 +1,9 @@ import pytest -from fideslang.gvl import ( - GVL_FEATURES, - GVL_SPECIAL_FEATURES, - Feature, - data_category_id_to_data_categories, - feature_id_to_feature_name, - feature_name_to_feature, - purpose_to_data_use, -) +from fideslang.gvl import (GVL_FEATURES, GVL_SPECIAL_FEATURES, Feature, + data_category_id_to_data_categories, + feature_id_to_feature_name, feature_name_to_feature, + purpose_to_data_use) def test_purpose_to_data_use(): diff --git a/tests/fideslang/test_default_taxonomy.py b/tests/fideslang/test_default_taxonomy.py index d04f8e9b..be9075b3 100644 --- a/tests/fideslang/test_default_taxonomy.py +++ b/tests/fideslang/test_default_taxonomy.py @@ -25,6 +25,12 @@ def test_taxonomy_count(self, type_and_count: Tuple[str, int]) -> None: def test_are_set_as_default(self, data_type: str) -> None: assert all([x.is_default for x in getattr(DEFAULT_TAXONOMY, data_type)]) + @pytest.mark.parametrize("data_type", taxonomy_counts.keys()) + def test_valid_json_schemas(self, data_type: str) -> None: + # If this fails, an error will get thrown + getattr(DEFAULT_TAXONOMY, data_type)[0].model_json_schema() + assert True + @pytest.mark.parametrize("data_type", taxonomy_counts.keys()) def test_key_uniqueness(self, data_type: str) -> None: keys = [x.fides_key for x in getattr(DEFAULT_TAXONOMY, data_type)] diff --git a/tests/fideslang/test_manifests.py b/tests/fideslang/test_manifests.py index 5310624c..9fab7e04 100644 --- a/tests/fideslang/test_manifests.py +++ b/tests/fideslang/test_manifests.py @@ -68,7 +68,7 @@ def test_union_manifests(test_manifests): "name": "Test Dataset 1", "description": "Test Dataset 1", "fides_key": "some_dataset", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "datasetTables": [], @@ -77,7 +77,7 @@ def test_union_manifests(test_manifests): "name": "Test Dataset 2", "description": "Test Dataset 2", "fides_key": "another_dataset", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "datasetTables": [], @@ -86,14 +86,14 @@ def test_union_manifests(test_manifests): "system": [ { "name": "Test System 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 1", "fides_key": "some_system", }, { "name": "Test System 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 2", "fides_key": "another_system", @@ -122,7 +122,7 @@ def test_ingest_manifests(ingestion_manifest_directory): assert sorted(actual_result["dataset"], key=lambda x: x["name"]) == [ { "name": "Test Dataset 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "description": "Test Dataset 1", @@ -132,7 +132,7 @@ def test_ingest_manifests(ingestion_manifest_directory): { "name": "Test Dataset 2", "description": "Test Dataset 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "datasetType": {}, "datasetLocation": "somedb:3306", "fides_key": "another_dataset", @@ -142,14 +142,14 @@ def test_ingest_manifests(ingestion_manifest_directory): assert sorted(actual_result["system"], key=lambda x: x["name"]) == [ { "name": "Test System 1", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 1", "fides_key": "some_system", }, { "name": "Test System 2", - "organization_fides_key": 1, + "organization_fides_key": "1", "systemType": "mysql", "description": "Test System 2", "fides_key": "another_system", diff --git a/tests/fideslang/test_models.py b/tests/fideslang/test_models.py index 45f3218d..1040ed05 100644 --- a/tests/fideslang/test_models.py +++ b/tests/fideslang/test_models.py @@ -1,13 +1,9 @@ -from pytest import deprecated_call, mark, raises +from pytest import mark, raises -from fideslang import DataFlow, Dataset, Organization, PrivacyDeclaration, System -from fideslang.models import ( - ContactDetails, - DataResponsibilityTitle, - DatasetCollection, - DatasetField, - DataUse, -) +from fideslang.models import (ContactDetails, DataFlow, + DataResponsibilityTitle, Dataset, + DatasetCollection, DatasetField, DataUse, + Organization, PrivacyDeclaration, System) pytestmark = mark.unit @@ -68,11 +64,7 @@ def test_dataflow_invalid_type(self) -> None: class TestPrivacyDeclaration: def test_privacydeclaration_valid(self) -> None: assert PrivacyDeclaration( - data_categories=[], - data_subjects=[], data_use="provide", - egress=[], - ingress=[], name="declaration-name", ) @@ -102,7 +94,7 @@ def test_system_valid(self) -> None: ], meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", cookies=[{"name": "test_cookie"}], privacy_declarations=[ PrivacyDeclaration( @@ -117,7 +109,7 @@ def test_system_valid(self) -> None: ], ) ], - registry_id=1, + registry_id="1", system_type="SYSTEM", tags=["some", "tags"], ) @@ -153,7 +145,7 @@ def test_system_valid_nested_meta(self) -> None: }, }, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -164,7 +156,7 @@ def test_system_valid_nested_meta(self) -> None: name="declaration-name", ) ], - registry_id=1, + registry_id="1", system_type="SYSTEM", tags=["some", "tags"], ) @@ -189,7 +181,7 @@ def test_system_valid_no_meta(self) -> None: ], # purposefully omitting the `meta` property to ensure it's effectively optional name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -207,12 +199,16 @@ def test_system_valid_no_meta(self) -> None: assert system.meta == None def test_system_valid_no_egress_or_ingress(self) -> None: + """ + If there are no ingress/egress at the System level, as well as + none at the PrivacyDeclaration level, it is valid. + """ assert System( description="Test Policy", fides_key="test_system", meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -227,6 +223,10 @@ def test_system_valid_no_egress_or_ingress(self) -> None: ) def test_system_no_egress(self) -> None: + """ + If there is an ingress or egress at the PrivacyDeclaration level that + isn't at the system level, we should get a validation error. + """ with raises(ValueError): assert System( description="Test Policy", @@ -240,7 +240,7 @@ def test_system_no_egress(self) -> None: ], meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -257,6 +257,10 @@ def test_system_no_egress(self) -> None: ) def test_system_no_ingress(self) -> None: + """ + If there is an ingress or egress at the PrivacyDeclaration level that + isn't at the system level, we should get a validation error. + """ with raises(ValueError): assert System( description="Test Policy", @@ -270,7 +274,7 @@ def test_system_no_ingress(self) -> None: fides_key="test_system", meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -299,7 +303,7 @@ def test_system_user_ingress_valid(self) -> None: ], meta={"some": "meta stuff"}, name="Test System", - organization_fides_key=1, + organization_fides_key="1", privacy_declarations=[ PrivacyDeclaration( data_categories=[], @@ -309,7 +313,7 @@ def test_system_user_ingress_valid(self) -> None: name="declaration-name", ) ], - registry_id=1, + registry_id="1", system_type="SYSTEM", tags=["some", "tags"], ) @@ -317,11 +321,11 @@ def test_system_user_ingress_valid(self) -> None: def test_expanded_system(self): assert System( fides_key="test_system", - organization_fides_key=1, + organization_fides_key="1", tags=["some", "tags"], name="Exponential Interactive, Inc d/b/a VDX.tv", description="My system test", - registry_id=1, + registry_id="1", meta={"some": "meta stuff"}, system_type="SYSTEM", egress=[ @@ -415,7 +419,6 @@ def test_expanded_system(self): def test_flexible_legal_basis_default(self): pd = PrivacyDeclaration( data_categories=[], - data_qualifier="aggregated_data", data_subjects=[], data_use="provide", ingress=["user"], @@ -423,44 +426,6 @@ def test_flexible_legal_basis_default(self): ) assert pd.flexible_legal_basis_for_processing - @mark.parametrize( - "deprecated_field,value", - [ - ("data_responsibility_title", "Controller"), - ( - "joint_controller", - { - "name": "Jane Doe", - "address": "104 Test Lane; Test Town, TX, 32522", - "email": "jane@example.com", - "phone": "345-255-2555", - }, - ), - ("third_country_transfers", ["GBR"]), - ( - "data_protection_impact_assessment", - { - "is_required": True, - "progress": "pending", - "link": "https://www.example.com/dpia", - }, - ), - ], - ) - def test_system_deprecated_fields(self, deprecated_field, value) -> None: - with deprecated_call(match=deprecated_field): - assert System( - **{ - "description": "Test System", - "fides_key": "test_system", - "name": "Test System", - "registry": 1, - "system_type": "SYSTEM", - "privacy_declarations": [], - deprecated_field: value, - } - ) - class TestDataset: def test_valid_dataset(self): @@ -522,24 +487,6 @@ def test_valid_dataset(self): ], ) - @mark.parametrize( - "deprecated_field,value", - [ - ("joint_controller", {"name": "Controller_name"}), - ("retention", "90 days"), - ("third_country_transfers", ["IRL"]), - ], - ) - def test_dataset_deprecated_fields(self, deprecated_field, value) -> None: - with deprecated_call(match=deprecated_field): - assert Dataset( - **{ - "fides_key": "test_dataset", - "collections": [], - deprecated_field: value, - } - ) - def test_dataset_collection_skip_processing(self): collection = DatasetCollection( name="dataset_collection_1", @@ -570,17 +517,3 @@ def test_dataset_collection_skip_processing(self): class TestDataUse: def test_minimal_data_use(self): assert DataUse(fides_key="new_use") - - @mark.parametrize( - "deprecated_field,value", - [ - ("legal_basis", "Legal Obligation"), - ("special_category", "Substantial Public Interest"), - ("recipients", ["Advertising Bureau"]), - ("legitimate_interest", False), - ("legitimate_interest_impact_assessment", "https://www.example.com"), - ], - ) - def test_datause_deprecated_fields(self, deprecated_field, value) -> None: - with deprecated_call(match=deprecated_field): - assert DataUse(**{"fides_key": "new_use", deprecated_field: value}) diff --git a/tests/fideslang/test_parse.py b/tests/fideslang/test_parse.py index b94e752b..5c90cd74 100644 --- a/tests/fideslang/test_parse.py +++ b/tests/fideslang/test_parse.py @@ -1,19 +1,18 @@ import pytest -from fideslang import models -from fideslang import parse +from fideslang import models, parse @pytest.mark.unit def test_parse_manifest(): expected_result = models.DataCategory( - organization_fides_key=1, + organization_fides_key="1", fides_key="some_resource", name="Test resource 1", description="Test Description", ) test_dict = { - "organization_fides_key": 1, + "organization_fides_key": "1", "fides_key": "some_resource", "name": "Test resource 1", "description": "Test Description", @@ -26,7 +25,7 @@ def test_parse_manifest(): def test_parse_manifest_no_fides_key_validation_error(): with pytest.raises(SystemExit): test_dict = { - "organization_fides_key": 1, + "organization_fides_key": "1", "name": "Test resource 1", "description": "Test Description", } @@ -38,7 +37,7 @@ def test_parse_manifest_no_fides_key_validation_error(): def test_parse_manifest_resource_type_error(): with pytest.raises(SystemExit): test_dict = { - "organization_fides_key": 1, + "organization_fides_key": "1", "fides_key": "some_resource", "name": "Test resource 1", "description": "Test Description", diff --git a/tests/fideslang/test_relationships.py b/tests/fideslang/test_relationships.py index 54ca20ac..4a409147 100644 --- a/tests/fideslang/test_relationships.py +++ b/tests/fideslang/test_relationships.py @@ -1,23 +1,11 @@ import pytest from fideslang import relationships -from fideslang.models import ( - ContactDetails, - DataCategory, - DataFlow, - DataProtectionImpactAssessment, - Dataset, - DatasetCollection, - DatasetField, - DataUse, - MatchesEnum, - Organization, - Policy, - PolicyRule, - PrivacyDeclaration, - System, - Taxonomy, -) +from fideslang.models import (ContactDetails, DataCategory, DataFlow, + DataProtectionImpactAssessment, Dataset, + DatasetCollection, DatasetField, DataUse, + MatchesEnum, Organization, Policy, PolicyRule, + PrivacyDeclaration, System, Taxonomy) @pytest.mark.unit @@ -108,7 +96,7 @@ def test_find_referenced_fides_keys_1(self) -> None: assert referenced_keys == set(expected_referenced_key) def test_find_referenced_fides_keys_2(self) -> None: - test_system = System.construct( + test_system = System.model_construct( name="test_dc", fides_key="test_dc", description="test description", @@ -158,7 +146,7 @@ def test_get_referenced_missing_keys(self): ), ], system=[ - System.construct( + System.model_construct( name="test_system", fides_key="test_system", description="test description", diff --git a/tests/fideslang/test_validation.py b/tests/fideslang/test_validation.py index 7f87d141..cfe3459f 100644 --- a/tests/fideslang/test_validation.py +++ b/tests/fideslang/test_validation.py @@ -1,28 +1,13 @@ import pytest from pydantic import ValidationError -from fideslang.models import ( - CollectionMeta, - DataCategory, - DataFlow, - Dataset, - DataUse, - DataSubject, - DatasetCollection, - DatasetField, - DatasetMetadata, - DataUse, - FidesCollectionKey, - FidesDatasetReference, - FidesMeta, - FidesModel, - Policy, - PolicyRule, - PrivacyDeclaration, - PrivacyRule, - System, -) -from fideslang.validation import FidesKey, FidesValidationError, valid_data_type +from fideslang.models import (CollectionMeta, DataCategory, DataFlow, Dataset, + DatasetCollection, DatasetField, DatasetMetadata, + DataSubject, DataUse, FidesDatasetReference, + FidesMeta, FidesModel, Policy, PolicyRule, + PrivacyDeclaration, PrivacyRule, System) +from fideslang.validation import (FidesValidationError, valid_data_type, + validate_fides_key) DEFAULT_TAXONOMY_CLASSES = [DataCategory, DataUse, DataSubject] @@ -36,7 +21,7 @@ def test_default_no_versions_error(self, TaxonomyClass): """There should be version info for default items.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -48,7 +33,7 @@ def test_not_default_no_versions_error(self, TaxonomyClass): """There shouldn't be version info on a non-default item.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -60,7 +45,7 @@ def test_deprecated_when_added(self, TaxonomyClass): """Item can't be deprecated in a version earlier than it was added.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -74,7 +59,7 @@ def test_deprecated_after_added(self, TaxonomyClass): """Item can't be deprecated in a version earlier than it was added.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -86,9 +71,9 @@ def test_deprecated_after_added(self, TaxonomyClass): @pytest.mark.parametrize("TaxonomyClass", DEFAULT_TAXONOMY_CLASSES) def test_built_from_dict_with_empty_versions(self, TaxonomyClass) -> None: """Try building from a dictionary with explicit None values.""" - TaxonomyClass.parse_obj( + TaxonomyClass.model_validate( { - "organization_fides_key": 1, + "organization_fides_key": "1", "fides_key": "user", "name": "Custom Test Data", "description": "Custom Test Data Category", @@ -103,7 +88,7 @@ def test_built_from_dict_with_empty_versions(self, TaxonomyClass) -> None: def test_built_with_empty_versions(self, TaxonomyClass) -> None: """Try building directly with explicit None values.""" TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -118,7 +103,7 @@ def test_deprecated_not_added(self, TaxonomyClass): """Can't be deprecated without being added in an earlier version.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -131,7 +116,7 @@ def test_replaced_not_deprecated(self, TaxonomyClass): """If the field is replaced, it must also be deprecated.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -144,7 +129,7 @@ def test_replaced_not_deprecated(self, TaxonomyClass): def test_replaced_and_deprecated(self, TaxonomyClass): """If the field is replaced, it must also be deprecated.""" assert TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -159,7 +144,7 @@ def test_version_error(self, TaxonomyClass): """Check that versions are validated.""" with pytest.raises(ValidationError): TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -171,7 +156,7 @@ def test_version_error(self, TaxonomyClass): def test_versions_valid(self, TaxonomyClass): """Check that versions are validated.""" assert TaxonomyClass( - organization_fides_key=1, + organization_fides_key="1", fides_key="user", name="Custom Test Data", description="Custom Test Data Category", @@ -189,7 +174,7 @@ def test_collections_duplicate_fields_error(): data_categories=[], fields=[ DatasetField( - name=1, + name="1", description="Fides Generated Description for Column: 1", data_categories=[], ), @@ -199,7 +184,7 @@ def test_collections_duplicate_fields_error(): data_categories=[], ), DatasetField( - name=1, + name="1", description="Fides Generated Description for Column: 1", data_categories=[], ), @@ -222,7 +207,7 @@ def test_dataset_duplicate_collections_error(): data_categories=[], fields=[ DatasetField( - name=1, + name="1", description="Fides Generated Description for Column: 1", data_categories=[], ), @@ -245,139 +230,120 @@ def test_dataset_duplicate_collections_error(): @pytest.mark.unit -def test_top_level_resource(): - DataCategory( - organization_fides_key=1, - fides_key="user", - name="Custom Test Data", - description="Custom Test Data Category", - ) - assert DataCategory - - -@pytest.mark.unit -def test_fides_key_doesnt_match_stated_parent_key(): - with pytest.raises(ValidationError): +class TestFidesKeyValidation: + def test_top_level_resource(self): DataCategory( - organization_fides_key=1, - fides_key="user.custom_test_data", + organization_fides_key="1", + fides_key="user", name="Custom Test Data", description="Custom Test Data Category", - parent_key="user.account", ) - assert DataCategory - - -@pytest.mark.unit -def test_fides_key_matches_stated_parent_key(): - DataCategory( - organization_fides_key=1, - fides_key="user.account.custom_test_data", - name="Custom Test Data", - description="Custom Test Data Category", - parent_key="user.account", - ) - assert DataCategory + assert DataCategory + def test_fides_key_doesnt_match_stated_parent_key(self): + with pytest.raises(ValidationError): + DataCategory( + organization_fides_key="1", + fides_key="user.custom_test_data", + name="Custom Test Data", + description="Custom Test Data Category", + parent_key="user.account", + ) + assert DataCategory -@pytest.mark.unit -def test_no_parent_key_but_fides_key_contains_parent_key(): - with pytest.raises(ValidationError): + def test_fides_key_matches_stated_parent_key(self): DataCategory( - organization_fides_key=1, - fides_key="user.custom_test_data", + organization_fides_key="1", + fides_key="user.account.custom_test_data", name="Custom Test Data", description="Custom Test Data Category", + parent_key="user.account", ) - assert DataCategory - - -@pytest.mark.unit -def test_fides_key_with_carets(): - DataCategory( - organization_fides_key=1, - fides_key="", - name="Example valid key with brackets", - description="This key contains a <> which is valid", - ) - assert DataCategory + assert DataCategory + def test_no_parent_key_but_fides_key_contains_parent_key(self): + with pytest.raises(ValidationError): + DataCategory( + organization_fides_key="1", + fides_key="user.custom_test_data", + name="Custom Test Data", + description="Custom Test Data Category", + ) + assert DataCategory -@pytest.mark.unit -def test_invalid_chars_in_fides_key(): - with pytest.raises(ValidationError): + def test_fides_key_with_carets(self): DataCategory( - organization_fides_key=1, - fides_key="!", - name="Example invalid key", - description="This key contains a ! so it is invalid", + organization_fides_key="1", + fides_key="", + name="Example valid key with brackets", + description="This key contains a <> which is valid", ) - assert DataCategory - - -@pytest.mark.unit -def test_create_valid_data_category(): - DataCategory( - organization_fides_key=1, - fides_key="user.custom_test_data", - name="Custom Test Data", - description="Custom Test Data Category", - parent_key="user", - ) - assert DataCategory + assert DataCategory + def test_invalid_chars_in_fides_key(self): + with pytest.raises(ValidationError): + DataCategory( + organization_fides_key="1", + fides_key="!", + name="Example invalid key", + description="This key contains a ! so it is invalid", + ) + assert DataCategory -@pytest.mark.unit -def test_circular_dependency_data_category(): - with pytest.raises(ValidationError): + def test_create_valid_data_category(self): DataCategory( - organization_fides_key=1, - fides_key="user", - name="User Data", - description="Test Data Category", + organization_fides_key="1", + fides_key="user.custom_test_data", + name="Custom Test Data", + description="Custom Test Data Category", parent_key="user", ) - assert True - - -@pytest.mark.unit -def test_create_valid_data_use(): - DataUse( - organization_fides_key=1, - fides_key="provide.service", - name="Provide the Product or Service", - parent_key="provide", - description="Test Data Use", - ) - assert True + assert DataCategory + def test_circular_dependency_data_category(self): + with pytest.raises(ValidationError): + DataCategory( + organization_fides_key="1", + fides_key="user", + name="User Data", + description="Test Data Category", + parent_key="user", + ) + assert True -@pytest.mark.unit -def test_circular_dependency_data_use(): - with pytest.raises(ValidationError): + def test_create_valid_data_use(self): DataUse( - organization_fides_key=1, + organization_fides_key="1", fides_key="provide.service", name="Provide the Product or Service", + parent_key="provide", description="Test Data Use", - parent_key="provide.service", ) - assert True - + assert True -@pytest.mark.unit -@pytest.mark.parametrize("fides_key", ["foo_bar", "foo-bar", "foo.bar", "foo_bar_8"]) -def test_fides_model_valid(fides_key: str): - fides_key = FidesModel(fides_key=fides_key, name="Foo Bar") - assert fides_key + def test_circular_dependency_data_use(self): + with pytest.raises(ValidationError): + DataUse( + organization_fides_key="1", + fides_key="provide.service", + name="Provide the Product or Service", + description="Test Data Use", + parent_key="provide.service", + ) + assert True + @pytest.mark.parametrize( + "fides_key", ["foo_bar", "foo-bar", "foo.bar", "foo_bar_8"] + ) + def test_fides_model_valid(self, fides_key: str): + fides_model = FidesModel(fides_key=fides_key, name="Foo Bar") + assert fides_model -@pytest.mark.unit -@pytest.mark.parametrize("fides_key", ["foo/bar", "foo%bar", "foo^bar"]) -def test_fides_model_fides_key_invalid(fides_key): - """Check for a bunch of different possible bad characters here.""" - with pytest.raises(ValidationError): - FidesModel(fides_key=fides_key) + @pytest.mark.parametrize("fides_key", ["foo/bar", "foo%bar", "foo^bar"]) + def test_fides_model_fides_key_invalid(self, fides_key: str): + """Check for a bunch of different possible bad characters here.""" + with pytest.raises(ValidationError): + FidesModel(fides_key=fides_key) @pytest.mark.unit @@ -403,8 +369,8 @@ def test_invalid_matches_privacy_rule(): @pytest.mark.unit def test_valid_policy_rule(): assert PolicyRule( - organization_fides_key=1, - policyId=1, + organization_fides_key="1", + policyId="1", fides_key="test_policy", name="Test Policy", description="Test Policy", @@ -417,10 +383,9 @@ def test_valid_policy_rule(): @pytest.mark.unit def test_valid_policy(): Policy( - organization_fides_key=1, + organization_fides_key="1", fides_key="test_policy", name="Test Policy", - version="1.3", description="Test Policy", rules=[], ) @@ -430,8 +395,8 @@ def test_valid_policy(): @pytest.mark.unit def test_create_valid_system(): System( - organization_fides_key=1, - registryId=1, + organization_fides_key="1", + registry_id=1, fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -461,8 +426,8 @@ def test_invalid_country_identifier(country_code: str): """Validate some invalid country identifiers raise an error""" with pytest.raises(ValidationError): System( - organization_fides_key=1, - registryId=1, + organization_fides_key="1", + registry_id="1", fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -486,8 +451,8 @@ def test_invalid_country_identifier(country_code: str): def test_valid_country_identifier(country_code: str): """Validates usage of alpha-3 codes per ISO 3166""" System( - organization_fides_key=1, - registryId=1, + organization_fides_key="1", + registry_id="1", fides_key="test_system", system_type="SYSTEM", name="Test System", @@ -509,12 +474,12 @@ def test_valid_country_identifier(country_code: str): @pytest.mark.unit def test_fides_key_validate_bad_key(): with pytest.raises(FidesValidationError): - FidesKey.validate("hi!") + validate_fides_key("hi!") @pytest.mark.unit def test_fides_key_validate_good_key(): - FidesKey.validate("hello_test_file.txt") + validate_fides_key("hello_test_file.txt") @pytest.mark.unit @@ -712,9 +677,10 @@ def test_data_categories_at_object_level(self): ), fields=[DatasetField(name="nested_field")], ) - assert "Object field 'test_field' cannot have specified data_categories" in str( - exc - ) + assert ( + "Object field 'test_field' cannot have specified data_categories" + in str(exc) + ) def test_object_field_conflicting_types(self): with pytest.raises(ValidationError) as exc: @@ -732,10 +698,10 @@ def test_object_field_conflicting_types(self): ), fields=[DatasetField(name="nested_field")], ) - assert ( - "The data type 'string' on field 'test_field' is not compatible with specified sub-fields." - in str(exc) - ) + assert ( + "The data type 'string' on field 'test_field' is not compatible with specified sub-fields." + in str(exc) + ) def test_data_categories_on_nested_fields(self): DatasetField( @@ -755,13 +721,11 @@ def test_data_categories_on_nested_fields(self): class TestCollectionMeta: def test_invalid_collection_key(self): with pytest.raises(ValidationError): - CollectionMeta(after=[FidesCollectionKey("test_key")]) + CollectionMeta(after=["test_key"]) def test_collection_key_has_too_many_components(self): with pytest.raises(ValidationError): - CollectionMeta( - after=[FidesCollectionKey("test_dataset.test_collection.test_field")] - ) + CollectionMeta(after=["test_dataset.test_collection.test_field"]) def test_valid_collection_key(self): - CollectionMeta(after=[FidesCollectionKey("test_dataset.test_collection")]) + CollectionMeta(after=["test_dataset.test_collection"])