diff --git a/docs/fides/docs/language/resources/dataset.md b/docs/fides/docs/language/resources/dataset.md index 4fcbdc197e..359ffb09d4 100644 --- a/docs/fides/docs/language/resources/dataset.md +++ b/docs/fides/docs/language/resources/dataset.md @@ -1,6 +1,6 @@ # Dataset -A Dataset takes a database schema (tables and columns) and adds Fides privacy categorizations. This is a database-agnostic way to annotate privacy declarations. +A Dataset takes a database schema (tables and columns) and adds Fides privacy categorizations. This is a database-agnostic way to annotate privacy declarations. ``` organization @@ -11,23 +11,21 @@ A Dataset takes a database schema (tables and columns) and adds Fides privacy ca |-> fields ``` - * The schema is represented as a set of "collections" (tables) that contain "fields" (columns). -* At each level -- Dataset, collection, and field, you can assign one or more Data Categories and Data Qualifiers. The Categories and Qualifiers declared at each child level is additive, for example, if you declare a collection with category `user.derived`, and a field with category `user.provided.identifiable.name`, your dataset will contain both user-derived and user-provided name data. +* At each level -- Dataset, collection, and field, you can assign one or more Data Categories and Data Qualifiers. The Categories and Qualifiers declared at each child level is additive, for example, if you declare a collection with category `user.derived`, and a field with category `user.provided.identifiable.name`, your dataset will contain both user-derived and user-provided name data. -While you can create Dataset objects by hand, you typically use the `fidesctl generate-dataset` command to create rudimentary Dataset manifest files that are based on your real-world databases. After you run the command, which creates the schema components, you add your Data Categories and Data Qualifiers to the manifest. +While you can create Dataset objects by hand, you typically use the `fidesctl generate-dataset` command to create rudimentary Dataset manifest files that are based on your real-world databases. After you run the command, which creates the schema components, you add your Data Categories and Data Qualifiers to the manifest. -You use your Datasets by adding them to Systems. A System can contain any number of Datasets, and a Dataset can be added to any number of Systems. +You use your Datasets by adding them to Systems. A System can contain any number of Datasets, and a Dataset can be added to any number of Systems. Datasets cannot contain other Datasets. - ## Object Structure -**fides_key**     _string_ +**fides_key**     _constrained string_ -A string token of your own invention that uniquely identifies this Dataset. It's your responsibility to ensure that the value is unique across all of your Dataset objects. The value may only contain alphanumeric characters and underbars (`[A-Za-z0-9_]`). +A string token of your own invention that uniquely identifies this Dataset. It's your responsibility to ensure that the value is unique across all of your Dataset objects. The value may only contain alphanumeric characters, underscores, and hyphens. (`[A-Za-z0-9_.-]`). **name**     _string_ @@ -56,7 +54,7 @@ Arrays of Data Category and Data Qualifier resources, identified by `fides_key`, **collections**     [_object_]
-An array of objects that describe the Dataset's collections. +An array of objects that describe the Dataset's collections. **collections.name**     string
@@ -73,7 +71,7 @@ Arrays of Data Category and Data Qualifier resources, identified by `fides_key`, **collections.fields**     [_object_]
-An array of objects that describe the collection's fields. +An array of objects that describe the collection's fields. **collections.fields.name**     string
@@ -98,6 +96,7 @@ An optional array of objects that describe hierarchical/nested fields (typically ## Examples ### **Manifest File** + ```yaml dataset: - fides_key: demo_users_dataset @@ -138,6 +137,7 @@ dataset: ``` ### **API Payload** + ```json { "fides_key": "demo_users_dataset", diff --git a/docs/fides/docs/language/resources/organization.md b/docs/fides/docs/language/resources/organization.md index fb03c89923..318c3b287a 100644 --- a/docs/fides/docs/language/resources/organization.md +++ b/docs/fides/docs/language/resources/organization.md @@ -1,17 +1,16 @@ # Organization - An Organization represents all or part of an enterprise or company, and establishes the root of your resource hierarchy. This means that while you can have more than Organization resource, they can't refer to each other's sub-resources. For example, your "American Stores" Organization can't refer to the Policy objects that are defined by your "European Stores" Organization. -Unless you're creating multiple Organizations (which should be rare), you can ignore the Organization resource. While all of your other resources must refer to an Organization (through their `organization_fides_key` properties), Fides creates a default Organization that it uses for all resources that don't otherwise specify an Organization. +Unless you're creating multiple Organizations (which should be rare), you can ignore the Organization resource. While all of your other resources must refer to an Organization (through their `organization_fides_key` properties), Fides creates a default Organization that it uses for all resources that don't otherwise specify an Organization. -The fides key for the default Organization is `default_organization` +The fides key for the default Organization is `default_organization` ## Object Structure **fides_key**  _string_ -A string token of your own invention that uniquely identifies this Organization. It's your responsibility to ensure that the value is unique across all of your Organization objects. The value should only contain alphanumeric characters and underbars (`[A-Za-z0-9_]`). +A string token of your own invention that uniquely identifies this Organization. It's your responsibility to ensure that the value is unique across all of your Organization objects. The value can only contain alphanumeric characters, hyphens, periods and underscores (`[A-Za-z0-9_.-]`). **name**  _string_ @@ -35,7 +34,8 @@ An array of contact information for an optional representative for the organizat ## Examples -### **Manifest File** +### **Manifest File** + ```yaml organization: fides_key: default_organization @@ -59,6 +59,7 @@ organization: ``` ### **API Payload** + ```json { "fides_key": "default_organization", diff --git a/docs/fides/docs/language/resources/policy.md b/docs/fides/docs/language/resources/policy.md index 145654cdec..b3f7c196a4 100644 --- a/docs/fides/docs/language/resources/policy.md +++ b/docs/fides/docs/language/resources/policy.md @@ -1,6 +1,7 @@ # Policy A Policy is your privacy policy as code, it lists a set of acceptable and non-acceptable rules and uses all 4 privacy attributes (`data_category`, `data_use`, `data_subject`, and `data_qualifier`). The purpose of the policy is to state what types of data are allowed for certain usages. + ``` organization |-> ** policy ** @@ -9,9 +10,9 @@ A Policy is your privacy policy as code, it lists a set of acceptable and non-ac ## Object Structure -**fides_key**     _string_ +**fides_key**     _constrained string_ -A string token of your own invention that uniquely identifies this Policy. It's your responsibility to ensure that the value is unique across all of your Policy objects. The value may only contain alphanumeric characters and underbars (`[A-Za-z0-9_]`). +A string token of your own invention that uniquely identifies this Policy. It's your responsibility to ensure that the value is unique across all of your Policy objects. The value may only contain alphanumeric characters, underscores, and hyphens. (`[A-Za-z0-9_.-]`). **name**     _string_ @@ -50,7 +51,6 @@ The matches criteria describes how you would like this rule to be evaluated. The The fides key of the [Organization](/fides/language/resources/organization/) to which this Policy belongs. - ## Examples ### **Manifest File** diff --git a/docs/fides/docs/language/resources/registry.md b/docs/fides/docs/language/resources/registry.md index f5385382a1..50881e08f6 100644 --- a/docs/fides/docs/language/resources/registry.md +++ b/docs/fides/docs/language/resources/registry.md @@ -1,7 +1,7 @@ # Registry +A Registry is a collection of System resources. You may add a System to a Registry by setting the System's `registry_id` field. -A Registry is a collection of System resources. You add a system to a Registry by setting the System's `registry_id` field. ``` organization |-> ** registry ** (optional) @@ -11,14 +11,13 @@ A Registry is a collection of System resources. You add a system to a Registry b * A System may belong to only one Registry. * All Registries are siblings: You cannot create a hierarchy of Registries. -* Collecting your systems into Registries is optional. - +* Collecting your systems into Registries is optional. ## Object Structure -**fides_key**     _string_ +**fides_key**     _constrained string_ -A string token of your own invention that uniquely identifies this Registry. It's your responsibility to ensure that the value is unique across all of your Registry objects. The value may only contain alphanumeric characters and underbars (`[A-Za-z0-9_]`). +A string token of your own invention that uniquely identifies this Registry. It's your responsibility to ensure that the value is unique across all of your Registry objects. The value may only contain alphanumeric characters, underscores, and hyphens. (`[A-Za-z0-9_.-]`). **name**     _string_ @@ -32,10 +31,10 @@ A human-readable description of the Registry. The fides key of the [Organization](/fides/language/resources/organization/) to which this Registry belongs. - ## Examples ### **Manifest File** + ```yaml registry: - fides_key: user_systems_registry @@ -44,6 +43,7 @@ registry: ``` ### **API Payload** + ```json { "fides_key": "user_systems_registry", diff --git a/docs/fides/docs/language/resources/system.md b/docs/fides/docs/language/resources/system.md index 6703fb168d..d33746de4e 100644 --- a/docs/fides/docs/language/resources/system.md +++ b/docs/fides/docs/language/resources/system.md @@ -9,12 +9,11 @@ A System is a model for describing anything that processes data for your organiz |-> privacy declarations ``` - ## Object Structure -**fides_key**     _string_ +**fides_key**     _constrained string_ -A string token of your own invention that uniquely identifies this System. It's your responsibility to ensure that the value is unique across all of your System objects. The value may only contain alphanumeric characters and underbars (`[A-Za-z0-9_]`). +A string token of your own invention that uniquely identifies this System. It's your responsibility to ensure that the value is unique across all of your System objects. The value may only contain alphanumeric characters, underscores, and hyphens. (`[A-Za-z0-9_.-]`). **name**     _string_ @@ -36,7 +35,6 @@ The array of declarations describing the types of data in your system. This is a The fides key of the [Organization](/fides/language/resources/organization/) to which this System belongs. - ## Examples ### **Manifest File** @@ -67,8 +65,6 @@ system: **Demo manifest file:** `/fides/fidesctl/demo_resources/demo_system.yml` - - ### **API** ```json diff --git a/docs/fides/docs/language/taxonomy/data_categories.md b/docs/fides/docs/language/taxonomy/data_categories.md index 20238ba3b2..a532ab0006 100644 --- a/docs/fides/docs/language/taxonomy/data_categories.md +++ b/docs/fides/docs/language/taxonomy/data_categories.md @@ -6,18 +6,17 @@ Data Categories are hierarchical labels used to describe the type of data proces ## Object Structure -**fides_key**_string_ +**fides_key**     _constrained string_ A string token that uniquely identifies this Data Category. The value is a dot-separated concatenation of the `fides_key` values of the resource's ancestors plus a final element for this resource: `grandparent.parent.this_data_category` -The final element (`this_data_category`) may only contain alphanumeric characters and underbars (`[A-Za-z0-9_]`). The dot character is reserved as a separator. - +The final element (`this_data_category`) may only contain alphanumeric characters and underscores (`[A-Za-z0-9_.-]`). The dot character is reserved as a separator. **name**_string_ -A UI-friendly label for the Data Category. +A UI-friendly label for the Data Category. **description**_string_ @@ -31,10 +30,9 @@ The fides key of the the Data Category's parent. The fides key of the organization to which this Data Category belongs. - !!! Note "Extensibility and Interopability" - Data Categories in Fides are designed to support common privacy regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. - + Data Categories in Fides are designed to support common privacy regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. + You can extend the taxonomy to support your system needs. If you do this, we recommend extending from the existing categories to ensure interopability inside and outside your organization. If you have suggestions for core categories that should ship with the taxonomy, please submit your requests [here](https://github.com/ethyca/fides/issues) @@ -43,7 +41,6 @@ The fides key of the organization to which this Data Category belongs. There are three top-level categories: - | Label | Parent Key | Description | | --- | --- | --- | | `account` | `-` | Data related to an account on the system. | @@ -60,20 +57,20 @@ Below is a reference for all subcategories of `account`, `system` and `user` to | Label | Parent Key | Description | | --- | --- | --- | -| `contact` | `account` | Contact data related to a system account. | -| `city` | `account.contact` | Account's city level address data. | -| `country` | `account.contact` | Account's country level address data. | -| `email` | `account.contact` | Account's email address. | -| `phone_number` | `account.contact` | Account's phone number. | -| `postal_code` | `account.contact` | Account's postal code. | -| `state` | `account.contact` | Account's state level address data. | -| `street` | `account.contact` | Account's street level address. | +| `contact` | `account` | Contact data related to a system account. | +| `city` | `account.contact` | Account's city level address data. | +| `country` | `account.contact` | Account's country level address data. | +| `email` | `account.contact` | Account's email address. | +| `phone_number` | `account.contact` | Account's phone number. | +| `postal_code` | `account.contact` | Account's postal code. | +| `state` | `account.contact` | Account's state level address data. | +| `street` | `account.contact` | Account's street level address. | ### Account Payment Data | Label | Parent Key | Description | | --- | --- | --- | -| `payment` | `account` | Payment data related to system account. | +| `payment` | `account` | Payment data related to system account. | | `financial_account_number` | `account.payment` | Payment data related to system account. | ## System Data Categories @@ -81,15 +78,16 @@ Below is a reference for all subcategories of `account`, `system` and `user` to | Label | Parent Key | Description | | --- | --- | --- | | `authentication` | `system` | Data used to manage access to the system. | -| `operations` | `system` | Data used for system operations. | +| `operations` | `system` | Data used for system operations. | ## User Data Categories -The "User" data category has two important subcategories for `derived` and `provided` data. +The "User" data category has two important subcategories for `derived` and `provided` data. In turn, `derived` and `provided` both have subcategories for `identifiable` and `nonidentifiable` data, to make it clear what data is considered identifiable in your systems. ### User Derived Data + Data derived from user provided data or as a result of user actions in the system. | Label | Parent Key | Description | @@ -123,6 +121,7 @@ Data derived from user provided data or as a result of user actions in the syste | `nonsensor` | `user.derived.nonidentifiable` |Non-user identifiable measurement data derived from sensors and monitoring systems. | ### User Provided Data + Data provided or created directly by a user of the system. | Label | Parent Key | Description | @@ -159,4 +158,4 @@ Data provided or created directly by a user of the system. | `drivers_license_number` | `user.provided.identifiable.government_id`|State issued driving identification number. | | `national_identification_number` | `user.provided.identifiable.government_id`|State issued personal identification number. | | `passport_number` | `user.provided.identifiable.government_id`|State issued passport data. | -| `nonidentifiable` | `user.provided` |Data provided or created directly by a user that is not identifiable. | \ No newline at end of file +| `nonidentifiable` | `user.provided` |Data provided or created directly by a user that is not identifiable. | diff --git a/docs/fides/docs/language/taxonomy/data_subjects.md b/docs/fides/docs/language/taxonomy/data_subjects.md index d0436fa985..d4ee611459 100644 --- a/docs/fides/docs/language/taxonomy/data_subjects.md +++ b/docs/fides/docs/language/taxonomy/data_subjects.md @@ -4,13 +4,13 @@ A Data Subject is a label that describes a segment of individuals whose data yo ## Object Structure -**fides_key**_string_ +**fides_key**     _constrained string_ -A string token of your own invention that uniquely identifies this Data Subject. It's your responsibility to ensure that the value is unique across all of your Data Subject objects. The value should only contain alphanumeric characters and underbars (`[A-Za-z0-9_]`). +A string token of your own invention that uniquely identifies this Data Subject. It's your responsibility to ensure that the value is unique across all of your Data Subject objects. The value can only contain alphanumeric characters, hyphens, periods and underscores (`[A-Za-z0-9_.-]`). **name**_string_ -A UI-friendly label for the Data Subject. +A UI-friendly label for the Data Subject. **description**_string_ @@ -21,8 +21,8 @@ A human-readable description of the Data Subject. The fides key of the organization to which this Data Subject belongs. !!! Note "Extensibility and Interopability" - Data Subjects in Fides are designed to support common privacy regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. - + Data Subjects in Fides are designed to support common privacy regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. + You can extend the taxonomy to support your organization's needs. If you do this, we recommend extending from the existing categories to ensure interopability inside and outside your organization. If you have suggestions for core categories that should ship with the taxonomy, please submit your requests [here](https://github.com/ethyca/fides/issues) diff --git a/docs/fides/docs/language/taxonomy/data_uses.md b/docs/fides/docs/language/taxonomy/data_uses.md index 169880aee3..92a4d0f184 100644 --- a/docs/fides/docs/language/taxonomy/data_uses.md +++ b/docs/fides/docs/language/taxonomy/data_uses.md @@ -2,22 +2,21 @@ A Data Use is a label that denotes the way data is used in your system: "Advertising, Marketing or Promotion", "First Party Advertising", and "Sharing for Legal Obligation", as examples. -Data Use objects form a hierarchy: A Data Use can contain any number of children, but a given Data Use may only have one parent. You assign a child Data Use to a parent by setting the child's `parent_key` property. For example, the `third_party_sharing.personalized_advertising` Data Use type is data used for personalized advertising when shared with third parties. +Data Use objects form a hierarchy: A Data Use can contain any number of children, but a given Data Use may only have one parent. You assign a child Data Use to a parent by setting the child's `parent_key` property. For example, the `third_party_sharing.personalized_advertising` Data Use type is data used for personalized advertising when shared with third parties. ## Object Structure -**fides_key**_string_ +**fides_key**     _constrained string_ A string token that uniquely identifies this Data Use. The value is a dot-separated concatenation of the `fides_key` values of the resource's ancestors plus a final element for this resource: `grandparent.parent.this_data_use` -The final element (`this_data_use`) may only contain alphanumeric characters and underbars (`[A-Za-z0-9_]`). The dot character is reserved as a separator. - +The final element (`this_data_use`) may only contain alphanumeric characters and underscores (`[A-Za-z0-9_.-]`). The dot character is reserved as a separator. **name**_string_ -A UI-friendly label for the Data Use. +A UI-friendly label for the Data Use. **description**_string_ @@ -31,15 +30,13 @@ The fides key of the the Data Use's parent. The fides key of the organization to which this Data Use belongs. - !!! Note "Extensibility and Interopability" - Data Uses in Fides are designed to support common privacy regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. - + Data Uses in Fides are designed to support common privacy regulations and standards out of the box, these include GDPR, CCPA, LGPD and ISO 19944. + You can extend the taxonomy to support your organization's needs. If you do this, we recommend extending from the existing categories to ensure interopability inside and outside your organization. If you have suggestions for core categories that should ship with the taxonomy, please submit your requests [here](https://github.com/ethyca/fides/issues) - ## Top Level Data Uses There are seven top-level Data Use classes: @@ -54,7 +51,6 @@ There are seven top-level Data Use classes: |`collect` |`-` |Collecting and storing data in order to use it for another purpose such as data training for ML. | |`train_ai_system` |`-` |Training an AI system. Please note when this data use is specified, the method and degree to which a user may be directly identified in the resulting AI system should be appended.| - For each top level classification there are multiple subclasses that provide richer context. Below is a reference for all subclasses of `account`, `system` and `user` to assist with describing all data across systems. @@ -74,14 +70,12 @@ Below is a reference for all subclasses of `account`, `system` and `user` to ass | --- | --- | --- | |`system` | `improve` |The source system, product, service or application being improved. | - ### Personalize Data Uses | Label | Parent Key | Description | | --- | --- | --- | |`system` | `personalize` | The source system, product, service or application being personalized. | - ### Advertising Data Uses | Label | Parent Key | Description | @@ -92,7 +86,6 @@ Below is a reference for all subclasses of `account`, `system` and `user` to ass |`third_party` | `advertising` | The promotion of products or services targeting users based on processing of specific categories of data acquired from third party sources. | |`personalized` | `advertising.third_party` | The targeting and changing of promotional content based on processing of specific categories of user data acquired from third party sources. | - ### Third Party Sharing Data Uses | Label | Parent Key | Description | @@ -104,7 +97,7 @@ Below is a reference for all subclasses of `account`, `system` and `user` to ass ### Collection & AI Training Data Uses -In the case of `collection` and `train_ai_system`, you will see these have no subclasses at present however define very specific data use cases that should be captured in data processes if they occur. +In the case of `collection` and `train_ai_system`, you will see these have no subclasses at present however define very specific data use cases that should be captured in data processes if they occur. | Label | Parent Key | Description | | --- | --- | --- | diff --git a/fidesctl/src/fideslang/validation.py b/fidesctl/src/fideslang/validation.py index a627b88d1f..974354a192 100644 --- a/fidesctl/src/fideslang/validation.py +++ b/fidesctl/src/fideslang/validation.py @@ -14,16 +14,16 @@ class FidesValidationError(Exception): class FidesKey(ConstrainedStr): """ - A FidesKey should only contain alphanumeric characters, '.' or '_' + A FidesKey type that creates a custom constrained string. """ - regex: Pattern[str] = re.compile(r"^[a-zA-Z0-9_.]+$") + regex: Pattern[str] = re.compile(r"^[a-zA-Z0-9_.-]+$") @classmethod # This overrides the default method to throw the custom FidesValidationError def validate(cls, value: str) -> str: if not cls.regex.match(value): raise FidesValidationError( - "FidesKey must only contain alphanumeric characters, '.' or '_'." + "FidesKey must only contain alphanumeric characters, '.', '_' or '-'." ) return value diff --git a/fidesctl/tests/lang/test_models.py b/fidesctl/tests/lang/test_validation.py similarity index 89% rename from fidesctl/tests/lang/test_models.py rename to fidesctl/tests/lang/test_validation.py index e984df88bd..2d9dc8d149 100644 --- a/fidesctl/tests/lang/test_models.py +++ b/fidesctl/tests/lang/test_validation.py @@ -114,31 +114,18 @@ def test_circular_dependency_data_use(): @pytest.mark.unit -def test_fides_model_valid(): - fides_key = FidesModel(fides_key="foo_bar", name="Foo Bar") +@pytest.mark.parametrize("fides_key", ["foo_bar", "foo-bar", "foo.bar", "foo_bar_8"]) +def test_fides_model_valid(fides_key: str): + fides_key = FidesModel(fides_key=fides_key, name="Foo Bar") assert fides_key @pytest.mark.unit -def test_fides_model_fides_key_invalid(): +@pytest.mark.parametrize("fides_key", ["foo/bar", "foo%bar", "foo^bar"]) +def test_fides_model_fides_key_invalid(fides_key): "Check for a bunch of different possible bad characters here." with pytest.raises(FidesValidationError): - FidesModel(fides_key="foo-bar") - - with pytest.raises(FidesValidationError): - FidesModel(fides_key="foo/bar") - - with pytest.raises(FidesValidationError): - FidesModel(fides_key="foo=bar") - - with pytest.raises(FidesValidationError): - FidesModel(fides_key="foo^bar") - - with pytest.raises(FidesValidationError): - FidesModel(fides_key="_foo^bar") - - with pytest.raises(FidesValidationError): - FidesModel(fides_key="") + FidesModel(fides_key=fides_key) @pytest.mark.unit @@ -150,7 +137,7 @@ def test_valid_privacy_rule(): @pytest.mark.unit def test_invalid_fides_key_privacy_rule(): with pytest.raises(FidesValidationError): - PrivacyRule(matches="ANY", values=["foo-bar"]) + PrivacyRule(matches="ANY", values=["foo^bar"]) assert True