diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
index e6044badb1b41..e697942b426cd 100644
--- a/.github/workflows/documentation.yml
+++ b/.github/workflows/documentation.yml
@@ -56,9 +56,10 @@ jobs:
./gradlew --info docs-website:build
- name: Deploy
- if: github.event_name == 'push' && github.repository == 'datahub-project/datahub'
+ if: github.event_name == 'push'
uses: peaceiris/actions-gh-pages@v3
with:
- github_token: ${{ secrets.GITHUB_TOKEN }}
+ # github_token: ${{ secrets.GITHUB_TOKEN }}
+ personal_token: ${{ secrets.PERSONAL_TOKEN }}
publish_dir: ./docs-website/build
- cname: datahubproject.io
+ # cname: datahubproject.io
diff --git a/README.md b/README.md
index 1089c4dbc055c..5835d36964d80 100644
--- a/README.md
+++ b/README.md
@@ -80,11 +80,15 @@ Please follow the [DataHub Quickstart Guide](https://datahubproject.io/docs/quic
If you're looking to build & modify datahub please take a look at our [Development Guide](https://datahubproject.io/docs/developers).
+<<<<<<< HEAD
+[![DataHub Demo GIF](docs/imgs/entity.png)](https://demo.datahubproject.io/)
+=======
(urn: String!, input: EntityUpdateInput!)` GraphQL Query.
+For example, to update a Dashboard entity, you can issue the following GraphQL mutation:
+
+```json
+mutation updateDashboard {
+ updateDashboard(
+ urn: "urn:li:dashboard:(looker,baz)",
+ input: {
+ editableProperties: {
+ description: "My new desription"
+ }
+ }
+ ) {
+ urn
+ }
+}
+```
+
+For more information, please refer to following links.
+
+- [Adding Tags](/docs/api/tutorials/adding-tags.md)
+- [Adding Glossary Terms](/docs/api/tutorials/adding-terms.md)
+- [Adding Domain](/docs/api/tutorials/adding-domain.md)
+- [Adding Owners](/docs/api/tutorials/adding-ownerships.md)
+- [Removing Tags](/docs/api/tutorials/removing-tags.md)
+- [Removing Glossary Terms](/docs/api/tutorials/removing-terms.md)
+- [Removing Domain](/docs/api/tutorials/removing-domains.md)
+- [Removing Owners](/docs/api/tutorials/removing-owners.md)
+- [Updating Deprecation](/docs/api/tutorials/update-deprecation.md)
+- [Editing Description (i.e. Documentation) on Datasets](/docs/api/tutorials/adding-dataset-description.md)
+- [Editing Description (i.e. Documnetation) on Columns](/docs/api/tutorials/adding-column-description.md)
+- [Soft Deleting](/docs/api/tutorials/deleting-entities-by-urn.md)
+
+Please refer to [Datahub API Comparison](/docs/api/datahub-apis.md#datahub-api-comparison) to navigate to the use-case oriented guide.
+
+## Handling Errors
+
+In GraphQL, requests that have errors do not always result in a non-200 HTTP response body. Instead, errors will be
+present in the response body inside a top-level `errors` field.
+
+This enables situations in which the client is able to deal gracefully will partial data returned by the application server.
+To verify that no error has returned after making a GraphQL request, make sure you check _both_ the `data` and `errors` fields that are returned.
+
+To catch a GraphQL error, simply check the `errors` field side the GraphQL response. It will contain a message, a path, and a set of extensions
+which contain a standard error code.
+
+```json
+{
+ "errors": [
+ {
+ "message": "Failed to change ownership for resource urn:li:dataFlow:(airflow,dag_abc,PROD). Expected a corp user urn.",
+ "locations": [
+ {
+ "line": 1,
+ "column": 22
+ }
+ ],
+ "path": ["addOwners"],
+ "extensions": {
+ "code": 400,
+ "type": "BAD_REQUEST",
+ "classification": "DataFetchingException"
+ }
+ }
+ ]
+}
+```
+
+With the following error codes officially supported:
+
+| Code | Type | Description |
+| ---- | ------------ | --------------------------------------------------------------------------------------------- |
+| 400 | BAD_REQUEST | The query or mutation was malformed. |
+| 403 | UNAUTHORIZED | The current actor is not authorized to perform the requested action. |
+| 404 | NOT_FOUND | The resource is not found. |
+| 500 | SERVER_ERROR | An internal error has occurred. Check your server logs or contact your DataHub administrator. |
+
+> Visit our [Slack channel](https://slack.datahubproject.io) to ask questions, tell us what we can do better, & make requests for what you'd like to see in the future. Or just
+> stop by to say 'Hi'.
+=======
:::note
Mutations which change Entity metadata are subject to [DataHub Access Policies](../../authorization/policies.md).
@@ -154,3 +262,4 @@ With the following error codes officially supported:
| 404 | NOT_FOUND | The resource is not found. |
| 500 | SERVER_ERROR | An internal error has occurred. Check your server logs or contact your DataHub administrator. |
+>>>>>>> upstream/master
diff --git a/docs/api/graphql/how-to-set-up-graphql.md b/docs/api/graphql/how-to-set-up-graphql.md
index 2be2f935b12b1..b7e106f8b9cc2 100644
--- a/docs/api/graphql/how-to-set-up-graphql.md
+++ b/docs/api/graphql/how-to-set-up-graphql.md
@@ -62,6 +62,11 @@ Postman is a popular API client that provides a graphical user interface for sen
Within Postman, you can create a `POST` request and set the request URL to the `/api/graphql` endpoint.
In the request body, select the `GraphQL` option and enter your GraphQL query in the request body.
+<<<<<<< HEAD
+![postman-graphql](../../imgs/apis/postman-graphql.png)
+
+Please refer to [Querying with GraphQL](https://learning.postman.com/docs/sending-requests/graphql/graphql/) in the Postman documentation for more information.
+=======
@@ -69,6 +74,7 @@ In the request body, select the `GraphQL` option and enter your GraphQL query in
Please refer to [Querying with GraphQL](https://learning.postman.com/docs/sending-requests/graphql/graphql-overview/) in the Postman documentation for more information.
+>>>>>>> upstream/master
### Authentication + Authorization
diff --git a/docs/api/graphql/overview.md b/docs/api/graphql/overview.md
index 3077d83416dff..2e43c9fe017a8 100644
--- a/docs/api/graphql/overview.md
+++ b/docs/api/graphql/overview.md
@@ -3,8 +3,14 @@
DataHub provides a rich [`graphql`](https://graphql.org/) API for programmatically interacting with the Entities & Relationships comprising your organization's Metadata Graph.
## Getting Started
+To begin using the DataHub GraphQL API, please consult the [Getting Started](/docs/api/graphql/getting-started.md).
+For detailed guidance on using GraphQL for specific use cases, please refer to [Datahub API Comparison](/docs/api/datahub-apis.md#datahub-api-comparison).
+
+<<<<<<< HEAD
+=======
To begin using the DataHub `graphql` API, please consult the [Getting Started](/docs/api/graphql/getting-started.md).
+>>>>>>> upstream/master
For detailed guidance on using `graphql` for specific use cases, please refer to [Datahub API Comparison](/docs/api/datahub-apis.md#datahub-api-comparison).
@@ -38,3 +44,9 @@ that may be performed using the API.
- Available Operations: [Queries](/graphql/queries.md) (Reads) & [Mutations](/graphql/mutations.md) (Writes)
- Schema Types: [Objects](/graphql/objects.md), [Input Objects](/graphql/inputObjects.md), [Interfaces](/graphql/interfaces.md), [Unions](/graphql/unions.md), [Enums](/graphql/enums.md), [Scalars](/graphql/scalars.md)
+<<<<<<< HEAD
+
+> Visit our [Slack channel](https://slack.datahubproject.io) to ask questions, tell us what we can do better, & make requests for what you'd like to see in the future. Or just
+stop by to say 'Hi'.
+=======
+>>>>>>> upstream/master
diff --git a/docs/api/graphql/token-management.md b/docs/api/graphql/token-management.md
index 4a4dd90bb55be..2e5d1e85e2672 100644
--- a/docs/api/graphql/token-management.md
+++ b/docs/api/graphql/token-management.md
@@ -131,3 +131,9 @@ curl --location --request POST 'http://localhost:8080/api/graphql' \
```
This endpoint will return a boolean detailing whether the operation was successful. In case of failure, an error message will appear explaining what went wrong.
+<<<<<<< HEAD
+
+> Visit our [Slack channel](https://slack.datahubproject.io) to ask questions, tell us what we can do better, & make requests for what you'd like to see in the future. Or just
+stop by to say 'Hi'.
+=======
+>>>>>>> upstream/master
diff --git a/docs/api/tutorials/adding-column-description.md b/docs/api/tutorials/adding-column-description.md
new file mode 100644
index 0000000000000..df9581ac39e5b
--- /dev/null
+++ b/docs/api/tutorials/adding-column-description.md
@@ -0,0 +1,204 @@
+# Adding Description on Columns
+
+## Why Would You Add Description on Columns?
+
+Adding column descriptions(documentation) to a dataset can provide crucial context for understanding the data and its variables. This can aid in data exploration, cleaning, and analysis, as well as ensure that others can understand the data if it is shared or used in collaboration. Additionally, column descriptions can help prevent errors and misunderstandings by clearly defining the meaning and units of measurement for each variable.
+
+### Goal Of This Guide
+
+This guide will show you how to add a description to `user_name `column of a dataset `fct_users_deleted`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before adding a description, you need to ensure the targeted dataset is already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from sample ingestion.
+:::
+
+In this example, we will add a description to `user_name `column of a dataset `fct_users_deleted`.
+
+## Add Description With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+mutation updateDescription {
+ updateDescription(
+ input: {
+ description: "Name of the user who was deleted. This description is updated via GrpahQL.",
+ resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)",
+ subResource: "user_name",
+ subResourceType:DATASET_FIELD
+ }
+ )
+}
+```
+
+Note that you can use general markdown in `description`. For example, you can do the following.
+
+```json
+mutation updateDescription {
+ updateDescription(
+ input: {
+ description: """
+ ### User Name
+ The `user_name` column is a primary key column that contains the name of the user who was deleted.
+ """,
+ resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)",
+ subResource: "user_name",
+ subResourceType:DATASET_FIELD
+ }
+ )
+}
+```
+
+`updateDescription` currently only supports Dataset Schema Fields, Containers.
+For more information about the `updateDescription` mutation, please refer to [updateLineage](https://datahubproject.io/docs/graphql/mutations/#updateDescription).
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "updateDescription": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation updateDescription { updateDescription ( input: { description: \"Name of the user who was deleted. This description is updated via GrpahQL.\", resourceUrn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)\", subResource: \"user_name\", subResourceType:DATASET_FIELD }) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "updateDescription": true }, "extensions": {} }
+```
+
+## Add Description With Python SDK
+
+Following code add a description to `user_name `column of a dataset `fct_users_deleted`.
+
+```python
+import logging
+import time
+
+from datahub.emitter.mce_builder import make_dataset_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+
+# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough)
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+# Imports for metadata model classes
+from datahub.metadata.schema_classes import (
+ AuditStampClass,
+ InstitutionalMemoryClass,
+ EditableSchemaMetadataClass,
+ EditableSchemaFieldInfoClass,
+)
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+def get_simple_field_path_from_v2_field_path(field_path: str) -> str:
+ """A helper function to extract simple . path notation from the v2 field path"""
+ if not field_path.startswith("[version=2.0]"):
+ # not a v2, we assume this is a simple path
+ return field_path
+ # this is a v2 field path
+ tokens = [
+ t for t in field_path.split(".") if not (t.startswith("[") or t.endswith("]"))
+ ]
+
+ return ".".join(tokens)
+
+# Inputs -> owner, ownership_type, dataset
+documentation_to_add = "Name of the user who was deleted. This description is updated via PythonSDK."
+dataset_urn = make_dataset_urn(platform="hive", name="fct_users_deleted", env="PROD")
+column = "user_name"
+field_info_to_set = EditableSchemaFieldInfoClass(
+ fieldPath=column, description=documentation_to_add
+)
+
+
+# Some helpful variables to fill out objects later
+now = int(time.time() * 1000) # milliseconds since epoch
+current_timestamp = AuditStampClass(time=now, actor="urn:li:corpuser:ingestion")
+
+
+# First we get the current owners
+gms_endpoint = "http://localhost:8080"
+graph = DataHubGraph(config=DatahubClientConfig(server=gms_endpoint))
+
+current_editable_schema_metadata = graph.get_aspect(
+ entity_urn=dataset_urn,
+ aspect_type=EditableSchemaMetadataClass,
+)
+
+
+need_write = False
+
+if current_editable_schema_metadata:
+ for fieldInfo in current_editable_schema_metadata.editableSchemaFieldInfo:
+ if get_simple_field_path_from_v2_field_path(fieldInfo.fieldPath) == column:
+ # we have some editable schema metadata for this field
+ field_match = True
+ if documentation_to_add != fieldInfo.description:
+ fieldInfo.description = documentation_to_add
+ need_write = True
+else:
+ # create a brand new editable dataset properties aspect
+ current_editable_schema_metadata = EditableSchemaMetadataClass(
+ editableSchemaFieldInfo=[field_info_to_set],
+ created=current_timestamp,
+ )
+ need_write = True
+
+if need_write:
+ event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
+ entityUrn=dataset_urn,
+ aspect=current_editable_schema_metadata,
+ )
+ graph.emit(event)
+ log.info(f"Documentation added to dataset {dataset_urn}")
+
+else:
+ log.info("Documentation already exists and is identical, omitting write")
+
+
+current_institutional_memory = graph.get_aspect(
+ entity_urn=dataset_urn, aspect_type=InstitutionalMemoryClass
+)
+
+need_write = False
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see column description is added to `user_name` column of `fct_users_deleted`.
+
+![column-description-added](../../imgs/apis/tutorials/column-description-added.png)
diff --git a/docs/api/tutorials/adding-dataset-description.md b/docs/api/tutorials/adding-dataset-description.md
new file mode 100644
index 0000000000000..87c13608d7d8c
--- /dev/null
+++ b/docs/api/tutorials/adding-dataset-description.md
@@ -0,0 +1,140 @@
+# Adding Description on Datasets
+
+## Why Would You Add Description on Dataset?
+
+Adding a description and related link to a dataset can provide important information about the data, such as its source, collection methods, and potential uses. This can help others understand the context of the data and how it may be relevant to their own work or research. Including a related link can also provide access to additional resources or related datasets, further enriching the information available to users.
+
+### Goal Of This Guide
+
+This guide will show you how to add a description and a link to dataset `fct_users_deleted`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before adding a description, you need to ensure the targeted dataset is already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from sample ingestion.
+:::
+
+In this example, we will add a description to `user_name `column of a dataset `fct_users_deleted`.
+
+## Add Description With GraphQL (Not Supported)
+
+> 🚫 Adding Description on Dataset via GraphQL is currently not supported.
+> Please check out [API feature comparison table](/docs/api/datahub-apis.md#datahub-api-comparison) for more information,
+
+## Add Description With Python SDK
+
+Following code add a description and link to a dataset named `fct_users_deleted`.
+For more information, please refer to [dataset_add_documentation.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/dataset_add_documentation.py)
+
+```python
+import logging
+import time
+
+from datahub.emitter.mce_builder import make_dataset_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+
+# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough)
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+# Imports for metadata model classes
+from datahub.metadata.schema_classes import (
+ AuditStampClass,
+ EditableDatasetPropertiesClass,
+ InstitutionalMemoryClass,
+ InstitutionalMemoryMetadataClass,
+)
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+# Inputs -> owner, ownership_type, dataset
+documentation_to_add = "This table contains information on users deleted on a single day. This description is updated via PythonSDK."
+link_to_add = "https://en.wikipedia.org/wiki/Fact_table"
+link_description = "This is the definition of what fact table means"
+dataset_urn = make_dataset_urn(platform="hive", name="fct_users_deleted", env="PROD")
+
+# Some helpful variables to fill out objects later
+now = int(time.time() * 1000) # milliseconds since epoch
+current_timestamp = AuditStampClass(time=now, actor="urn:li:corpuser:ingestion")
+institutional_memory_element = InstitutionalMemoryMetadataClass(
+ url=link_to_add,
+ description=link_description,
+ createStamp=current_timestamp,
+)
+
+
+# First we get the current owners
+gms_endpoint = "http://localhost:8080"
+graph = DataHubGraph(config=DatahubClientConfig(server=gms_endpoint))
+
+current_editable_properties = graph.get_aspect(
+ entity_urn=dataset_urn, aspect_type=EditableDatasetPropertiesClass
+)
+
+need_write = False
+if current_editable_properties:
+ if documentation_to_add != current_editable_properties.description:
+ current_editable_properties.description = documentation_to_add
+ need_write = True
+else:
+ # create a brand new editable dataset properties aspect
+ current_editable_properties = EditableDatasetPropertiesClass(
+ created=current_timestamp, description=documentation_to_add
+ )
+ need_write = True
+
+if need_write:
+ event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
+ entityUrn=dataset_urn,
+ aspect=current_editable_properties,
+ )
+ graph.emit(event)
+ log.info(f"Documentation added to dataset {dataset_urn}")
+
+else:
+ log.info("Documentation already exists and is identical, omitting write")
+
+
+current_institutional_memory = graph.get_aspect(
+ entity_urn=dataset_urn, aspect_type=InstitutionalMemoryClass
+)
+
+need_write = False
+
+if current_institutional_memory:
+ if link_to_add not in [x.url for x in current_institutional_memory.elements]:
+ current_institutional_memory.elements.append(institutional_memory_element)
+ need_write = True
+else:
+ # create a brand new institutional memory aspect
+ current_institutional_memory = InstitutionalMemoryClass(
+ elements=[institutional_memory_element]
+ )
+ need_write = True
+
+if need_write:
+ event = MetadataChangeProposalWrapper(
+ entityUrn=dataset_urn,
+ aspect=current_institutional_memory,
+ )
+ graph.emit(event)
+ log.info(f"Link {link_to_add} added to dataset {dataset_urn}")
+
+else:
+ log.info(f"Link {link_to_add} already exists and is identical, omitting write")
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see the description is added to `fct_users_deleted`.
+
+![dataset-description-added](../../imgs/apis/tutorials/dataset-description-added.png)
diff --git a/docs/api/tutorials/adding-domain.md b/docs/api/tutorials/adding-domain.md
new file mode 100644
index 0000000000000..e4b341d202434
--- /dev/null
+++ b/docs/api/tutorials/adding-domain.md
@@ -0,0 +1,83 @@
+# Adding a Dataset to a Domain
+
+## Why Would You Add Domains?
+
+Domains are curated, top-level folders or categories where related assets can be explicitly grouped. Management of Domains can be centralized, or distributed out to Domain owners Currently, an asset can belong to only one Domain at a time.
+For more information about domains, refer to [About DataHub Domains](/docs/domains.md).
+
+### Goal Of This Guide
+
+This guide will show you how to add a dataset named `fct_users_created` to a domain named `Marketing`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before adding domains, you need to ensure the targeted dataset and the domain are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+For more information on how to create domains, please refert to [Create Domain](/docs/api/tutorials/creating-domain.md)
+:::
+
+## Add Domains With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+mutation setDomain {
+ setDomain(domainUrn: "urn:li:domain:marketing", entityUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)")
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "setDomain": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation setDomain { setDomain(entityUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", domainUrn: "urn:li:domain:marketing")) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "setDomain": true }, "extensions": {} }
+```
+
+## Add Domains With Python SDK
+
+The following code adds a dataset `fct_users_created` to a domain named `Marketing`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see `CustomerAccount` domain has been added to `user_name` column.
+
+![tag-added](../../imgs/apis/tutorials/tag-added.png)
diff --git a/docs/api/tutorials/adding-lineage.md b/docs/api/tutorials/adding-lineage.md
new file mode 100644
index 0000000000000..ab62d3247ad42
--- /dev/null
+++ b/docs/api/tutorials/adding-lineage.md
@@ -0,0 +1,134 @@
+# Adding Lineage
+
+## Why Would You Add Lineage?
+
+Lineage is used to capture data dependencies within an organization. It allows you to track the inputs from which a data asset is derived, along with the data assets that depend on it downstream.
+For moreinformation about lineage, refer to [About DataHub Lineage](/docs/lineage/lineage-feature-guide.md).
+
+### Goal Of This Guide
+
+This guide will show you how to add lineage between two hive datasets named `fct_users_deleted` and `logging_events`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before adding lineage, you need to ensure the targeted dataset is already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from sample ingestion.
+:::
+
+## Add Lineage With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+mutation updateLineage {
+ updateLineage(
+ input: {
+ edgesToAdd: [
+ {
+ downstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,logging_events,PROD)"
+ upstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)"
+ }
+ ]
+ edgesToRemove: []
+ }
+ )
+}
+```
+
+Note that you can create a list of edges. For example, if you want to assign multiple upstream entities to a downstream entity, you can do the following.
+
+```json
+mutation updateLineage {
+ updateLineage(
+ input: {
+ edgesToAdd: [
+ {
+ downstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,logging_events,PROD)"
+ upstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)"
+ }
+ {
+ downstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,logging_events,PROD)"
+ upstreamUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"
+ }
+ ]
+ edgesToRemove: []
+ }
+ )
+}
+
+```
+
+For more information about the `updateLineage` mutation, please refer to [updateLineage](https://datahubproject.io/docs/graphql/mutations/#updatelineage).
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "updateLineage": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' --data-raw '{ "query": "mutation updateLineage { updateLineage( input:{ edgesToAdd : { downstreamUrn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)\", upstreamUrn : \"urn:li:dataset:(urn:li:dataPlatform:hive,logging_events,PROD)\"}, edgesToRemove :{downstreamUrn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)\",upstreamUrn : \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)\" } })}", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "updateLineage": true }, "extensions": {} }
+```
+
+## Add Lineage With Python SDK
+
+You can refer to the related code in [lineage_emitter_rest.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/lineage_emitter_rest.py).
+
+```python
+import datahub.emitter.mce_builder as builder
+from datahub.emitter.rest_emitter import DatahubRestEmitter
+
+# Construct a lineage object.
+lineage_mce = builder.make_lineage_mce(
+ [
+ builder.make_dataset_urn("hive", "fct_users_deleted"), # Upstream
+ ],
+ builder.make_dataset_urn("hive", "logging_events"), # Downstream
+)
+
+# Create an emitter to the GMS REST API.
+emitter = DatahubRestEmitter("http://localhost:8080")
+
+# Emit metadata!
+emitter.emit_mce(lineage_mce)
+```
+
+We're using the `MetdataChangeEvent` emitter to change entities in this example.
+For more information about the `MetadataChangeEvent`, please refer to [Metadata Change Event (MCE)](/docs/what/mxe.md#metadata-change-event-mce)
+
+## Expected Outcomes
+
+You can now see the lineage between `fct_users_deleted` and `logging_events`.
+
+![lineage-added](../../imgs/apis/tutorials/lineage-added.png)
diff --git a/docs/api/tutorials/adding-ownerships.md b/docs/api/tutorials/adding-ownerships.md
new file mode 100644
index 0000000000000..562ff50237aaa
--- /dev/null
+++ b/docs/api/tutorials/adding-ownerships.md
@@ -0,0 +1,158 @@
+# Adding Owners On Datasets/Columns
+
+## Why Would You Add Owners?
+
+Assigning an owner to an entity helps to establish accountability for the metadata and collaborating as a team.
+If there are any issues or questions about the data, the designated owner can serve as a reliable point of contact.
+
+### Goal Of This Guide
+
+This guide will show you how to add user group `bfoo` as an owner to the `fct_users_created` datatset.
+
+## Pre-requisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed information, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before adding owners, you need to ensure the targeted dataset and the owner are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+:::
+
+## Add Owners With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependancies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```python
+mutation addOwners {
+ addOwner(
+ input: {
+ ownerUrn: "urn:li:corpGroup:bfoo",
+ resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
+ ownerEntityType: CORP_GROUP,
+ type: TECHNICAL_OWNER
+ }
+ )
+}
+```
+
+Expected Response:
+
+```python
+{
+ "data": {
+ "addOwner": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation addOwners { addOwner(input: { ownerUrn: \"urn:li:corpGroup:bfoo\", resourceUrn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\", ownerEntityType: CORP_GROUP, type: TECHNICAL_OWNER }) }", "variables":{}}'
+```
+
+## Add Ownerships With Python SDK
+
+Following codes add an owner named `bfoo` to a hive dataset named `fct_users_created`.
+You can refer to a full code in [dataset_add_column_ownership.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/dataset_add_owner.py).
+
+```python
+# inlined from metadata-ingestion/examples/library/dataset_add_column_ownership.py
+import logging
+from typing import Optional
+
+from datahub.emitter.mce_builder import make_dataset_urn, make_user_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+
+# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough)
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+# Imports for metadata model classes
+from datahub.metadata.schema_classes import (
+ OwnerClass,
+ OwnershipClass,
+ OwnershipTypeClass,
+)
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+# Inputs -> owner, ownership_type, dataset
+owner_to_add = make_user_urn("bfoo")
+ownership_type = OwnershipTypeClass.TECHNICAL_OWNER
+dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD")
+
+# Some objects to help with conditional pathways later
+owner_class_to_add = OwnerClass(owner=owner_to_add, type=ownership_type)
+ownership_to_add = OwnershipClass(owners=[owner_class_to_add])
+
+
+# First we get the current owners
+gms_endpoint = "http://localhost:8080"
+graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))
+
+
+current_owners: Optional[OwnershipClass] = graph.get_aspect(
+ entity_urn=dataset_urn, aspect_type=OwnershipClass
+)
+
+
+need_write = False
+if current_owners:
+ if (owner_to_add, ownership_type) not in [
+ (x.owner, x.type) for x in current_owners.owners
+ ]:
+ # owners exist, but this owner is not present in the current owners
+ current_owners.owners.append(owner_class_to_add)
+ need_write = True
+else:
+ # create a brand new ownership aspect
+ current_owners = ownership_to_add
+ need_write = True
+
+if need_write:
+ event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
+ entityUrn=dataset_urn,
+ aspect=current_owners,
+ )
+ graph.emit(event)
+ log.info(
+ f"Owner {owner_to_add}, type {ownership_type} added to dataset {dataset_urn}"
+ )
+
+else:
+ log.info(f"Owner {owner_to_add} already exists, omitting write")
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+Expected Response:
+
+```json
+{ "data": { "addOwner": true }, "extensions": {} }
+```
+
+## Expected Outcomes
+
+You can now see `bfoo` has been added as an owner to the `fct_users_created` dataset.
+
+![ownership-added](../../imgs/apis/tutorials/owner-added.png)
diff --git a/docs/api/tutorials/adding-tags.md b/docs/api/tutorials/adding-tags.md
new file mode 100644
index 0000000000000..c03d9e8a19aae
--- /dev/null
+++ b/docs/api/tutorials/adding-tags.md
@@ -0,0 +1,210 @@
+# Adding Tags On Datasets/Columns
+
+## Why Would You Add Tags?
+
+Tags are informal, loosely controlled labels that help in search & discovery. They can be added to datasets, dataset schemas, or containers, for an easy way to label or categorize entities – without having to associate them to a broader business glossary or vocabulary.
+
+For moreinformation about tags, refer to [About DataHub Tags](/docs/tags.md).
+
+### Goal Of This Guide
+
+This guide will show you how to add a `CustomerAccount` tag to the `user_name` column of a dataset called `fct_users_created`.
+Additionally, we will cover how to add a tag to the dataset itself.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before adding tags, you need to ensure the targeted dataset and the tag are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+If you want to know how to create tags using APIs & SDKs, please refer to [Creating Tags](/docs/api/tutorials/creating-tags.md) and [Creating Datasets](/docs/api/tutorials/creating-datasets.md).
+.
+:::
+
+## Add Tags With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependancies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+mutation addTags {
+ addTags(
+ input: {
+ tagUrns: ["urn:li:tag:Legacy"],
+ resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
+ subResourceType:DATASET_FIELD,
+ subResource:"user_name"})
+}
+```
+
+Note that you can also add a tag on a dataset if you don't specify `subResourceType` and `subResource`.
+
+```json
+mutation addTags {
+ addTags(
+ input: {
+ tagUrns: ["urn:li:tag:Legacy"],
+ resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
+ }
+ )
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "addTags": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation addTags { addTags(input: { tagUrns: [\"urn:li:tag:Legacy\"], resourceUrn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\" }) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "addTags": true }, "extensions": {} }
+```
+
+## Add Tags With Python SDK
+
+The following code adds a tag named `Legacy` to the column `user_name` of the hive dataset `fct_users_created`.
+You can refer to the full code in [dataset_add_column_tag.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/dataset_add_column_tag.py).
+
+```python
+# inlined from metadata-ingestion/examples/library/dataset_add_column_tag.py
+import logging
+import time
+
+from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+
+# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough)
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+# Imports for metadata model classes
+from datahub.metadata.schema_classes import (
+ AuditStampClass,
+ EditableSchemaFieldInfoClass,
+ EditableSchemaMetadataClass,
+ GlobalTagsClass,
+ TagAssociationClass,
+)
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+def get_simple_field_path_from_v2_field_path(field_path: str) -> str:
+ """A helper function to extract simple . path notation from the v2 field path"""
+ if not field_path.startswith("[version=2.0]"):
+ # not a v2, we assume this is a simple path
+ return field_path
+ # this is a v2 field path
+ tokens = [
+ t for t in field_path.split(".") if not (t.startswith("[") or t.endswith("]"))
+ ]
+
+ return ".".join(tokens)
+
+
+# Inputs -> the column, dataset and the tag to set
+column = "user_name"
+dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD")
+tag_to_add = make_tag_urn("Legacy")
+
+
+# First we get the current editable schema metadata
+gms_endpoint = "http://localhost:8080"
+graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))
+
+
+current_editable_schema_metadata = graph.get_aspect(
+ entity_urn=dataset_urn,
+ aspect_type=EditableSchemaMetadataClass,
+)
+
+
+# Some pre-built objects to help all the conditional pathways
+tag_association_to_add = TagAssociationClass(tag=tag_to_add)
+tags_aspect_to_set = GlobalTagsClass(tags=[tag_association_to_add])
+field_info_to_set = EditableSchemaFieldInfoClass(
+ fieldPath=column, globalTags=tags_aspect_to_set
+)
+
+
+need_write = False
+field_match = False
+if current_editable_schema_metadata:
+ for fieldInfo in current_editable_schema_metadata.editableSchemaFieldInfo:
+ if get_simple_field_path_from_v2_field_path(fieldInfo.fieldPath) == column:
+ # we have some editable schema metadata for this field
+ field_match = True
+ if fieldInfo.globalTags:
+ if tag_to_add not in [x.tag for x in fieldInfo.globalTags.tags]:
+ # this tag is not present
+ fieldInfo.globalTags.tags.append(tag_association_to_add)
+ need_write = True
+ else:
+ fieldInfo.globalTags = tags_aspect_to_set
+ need_write = True
+
+ if not field_match:
+ # this field isn't present in the editable schema metadata aspect, add it
+ field_info = field_info_to_set
+ current_editable_schema_metadata.editableSchemaFieldInfo.append(field_info)
+ need_write = True
+
+else:
+ # create a brand new editable schema metadata aspect
+ now = int(time.time() * 1000) # milliseconds since epoch
+ current_timestamp = AuditStampClass(time=now, actor="urn:li:corpuser:ingestion")
+ current_editable_schema_metadata = EditableSchemaMetadataClass(
+ editableSchemaFieldInfo=[field_info_to_set],
+ created=current_timestamp,
+ )
+ need_write = True
+
+if need_write:
+ event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
+ entityUrn=dataset_urn,
+ aspect=current_editable_schema_metadata,
+ )
+ graph.emit(event)
+ log.info(f"Tag {tag_to_add} added to column {column} of dataset {dataset_urn}")
+
+else:
+ log.info(f"Tag {tag_to_add} already attached to column {column}, omitting write")
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see `CustomerAccount` tag has been added to `user_name` column.
+
+![tag-added](../../imgs/apis/tutorials/tag-added.png)
diff --git a/docs/api/tutorials/adding-terms.md b/docs/api/tutorials/adding-terms.md
new file mode 100644
index 0000000000000..7415dfbe36da2
--- /dev/null
+++ b/docs/api/tutorials/adding-terms.md
@@ -0,0 +1,210 @@
+# Adding Terms On Datasets/Columns
+
+## Why Would You Add Terms?
+
+The Business Glossary(Term) feature in DataHub helps you use a shared vocabulary within the orgarnization, by providing a framework for defining a standardized set of data concepts and then associating them with the physical assets that exist within your data ecosystem.
+
+For moreinformation about terms, refer to [About DataHub Business Glossary](/docs/glossary/business-glossary.md).
+
+### Goal Of This Guide
+
+This guide will show you how to add a `CustomerAccount` term to `user_name` column of a dataset named `fct_users_created`.
+Also, we will cover how to add a term to a dataset itself.
+
+## Pre-requisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed information, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before adding terms, you need to ensure the targeted dataset and the term are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+If you want to know how to create entities using APIs & SDKs, please refer to [Creating Terms](/docs/api/tutorials/creating-terms.md) and [Creating Datasets](/docs/api/tutorials/creating-datasets.md).
+:::
+
+## Add Terms With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependancies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```python
+mutation addTerms {
+ addTerms(
+ input: {
+ termUrns: ["urn:li:glossaryTerm:CustomerAccount"],
+ resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
+ subResourceType:DATASET_FIELD,
+ subResource:"user_name"})
+}
+```
+
+Note that you can also add a term on a dataset if you don't specify `subResourceType` and `subResource`.
+
+```json
+mutation addTerms {
+ addTerms(
+ input: {
+ termUrns: ["urn:li:glossaryTerm:CustomerAccount"],
+ resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
+ }
+ )
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "addTerms": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation addTerm { addTerms(input: { termUrns: [\"urn:li:glossaryTerm:CustomerAccount\"], resourceUrn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\" }) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "addTerms": true }, "extensions": {} }
+```
+
+## Add Terms With Python SDK
+
+Following codes add a glossary term named `CustomerAccount` to a column `user_name` of a hive dataset named `fct_users_created`.
+You can refer to a full code in [dataset_add_column_term.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/dataset_add_column_term.py).
+
+```python
+# inlined from metadata-ingestion/examples/library/dataset_add_column_term.py
+import logging
+import time
+
+from datahub.emitter.mce_builder import make_dataset_urn, make_term_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+
+# read-modify-write requires access to the DataHubGraph (RestEmitter is not enough)
+from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
+
+# Imports for metadata model classes
+from datahub.metadata.schema_classes import (
+ AuditStampClass,
+ EditableSchemaFieldInfoClass,
+ EditableSchemaMetadataClass,
+ GlossaryTermAssociationClass,
+ GlossaryTermsClass,
+)
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+
+def get_simple_field_path_from_v2_field_path(field_path: str) -> str:
+ """A helper function to extract simple . path notation from the v2 field path"""
+ if not field_path.startswith("[version=2.0]"):
+ # not a v2, we assume this is a simple path
+ return field_path
+ # this is a v2 field path
+ tokens = [
+ t for t in field_path.split(".") if not (t.startswith("[") or t.endswith("]"))
+ ]
+
+ return ".".join(tokens)
+
+
+# Inputs -> the column, dataset and the term to set
+column = "user_name"
+dataset_urn = make_dataset_urn(platform="hive", name="fct_users_created", env="PROD")
+term_to_add = make_term_urn("User")
+
+
+# First we get the current editable schema metadata
+gms_endpoint = "http://localhost:8080"
+graph = DataHubGraph(DatahubClientConfig(server=gms_endpoint))
+
+
+current_editable_schema_metadata = graph.get_aspect(
+ entity_urn=dataset_urn, aspect_type=EditableSchemaMetadataClass
+)
+
+
+# Some pre-built objects to help all the conditional pathways
+now = int(time.time() * 1000) # milliseconds since epoch
+current_timestamp = AuditStampClass(time=now, actor="urn:li:corpuser:ingestion")
+
+term_association_to_add = GlossaryTermAssociationClass(urn=term_to_add)
+term_aspect_to_set = GlossaryTermsClass(
+ terms=[term_association_to_add], auditStamp=current_timestamp
+)
+field_info_to_set = EditableSchemaFieldInfoClass(
+ fieldPath=column, glossaryTerms=term_aspect_to_set
+)
+
+need_write = False
+field_match = False
+if current_editable_schema_metadata:
+ for fieldInfo in current_editable_schema_metadata.editableSchemaFieldInfo:
+ if get_simple_field_path_from_v2_field_path(fieldInfo.fieldPath) == column:
+ # we have some editable schema metadata for this field
+ field_match = True
+ if fieldInfo.glossaryTerms:
+ if term_to_add not in [x.urn for x in fieldInfo.glossaryTerms.terms]:
+ # this term is not present
+ fieldInfo.glossaryTerms.terms.append(term_association_to_add)
+ need_write = True
+ else:
+ fieldInfo.glossaryTerms = term_aspect_to_set
+ need_write = True
+
+ if not field_match:
+ # this field isn't present in the editable schema metadata aspect, add it
+ field_info = field_info_to_set
+ current_editable_schema_metadata.editableSchemaFieldInfo.append(field_info)
+ need_write = True
+
+else:
+ # create a brand new editable schema metadata aspect
+ current_editable_schema_metadata = EditableSchemaMetadataClass(
+ editableSchemaFieldInfo=[field_info_to_set],
+ created=current_timestamp,
+ )
+ need_write = True
+
+if need_write:
+ event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
+ entityUrn=dataset_urn,
+ aspect=current_editable_schema_metadata,
+ )
+ graph.emit(event)
+ log.info(f"Term {term_to_add} added to column {column} of dataset {dataset_urn}")
+
+else:
+ log.info(f"Term {term_to_add} already attached to column {column}, omitting write")
+
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see the term `CustomerAccount` has been added to `user_name` column.
+![term-added](../../imgs/apis/tutorials/term-created.png)
diff --git a/docs/api/tutorials/creating-datasets.md b/docs/api/tutorials/creating-datasets.md
new file mode 100644
index 0000000000000..e485cb4696eea
--- /dev/null
+++ b/docs/api/tutorials/creating-datasets.md
@@ -0,0 +1,113 @@
+# Creating Datasets
+
+## Why Would You Create Datasets?
+
+The dataset entity is one the most important entities in the metadata model. They represent collections of data that are typically represented as Tables or Views in a database (e.g. BigQuery, Snowflake, Redshift etc.), Streams in a stream-processing environment (Kafka, Pulsar etc.), bundles of data found as Files or Folders in data lake systems (S3, ADLS, etc.).
+For more information about datasets, refer to [Dataset](/docs/generated/metamodel/entities/dataset.md).
+
+### Goal Of This Guide
+
+This guide will show you how to create a dataset named `realestate_db.sales` with three columns.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+## Create Datasets With GraphQL (Not Supported)
+
+> 🚫 Creating a dataset via GraphQL is currently not supported.
+> Please check out [API feature comparison table](/docs/api/datahub-apis.md#datahub-api-comparison) for more information,
+
+## Create Datasets With Python SDK
+
+The following code creates a hive dataset named `realestate_db.sales` with three fields.
+You can refer to the complete code in [dataset_schema.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/dataset_schema.py).
+
+```python
+# inlined from metadata-ingestion/examples/library/dataset_schema.py
+# Imports for urn construction utility methods
+from datahub.emitter.mce_builder import make_data_platform_urn, make_dataset_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.emitter.rest_emitter import DatahubRestEmitter
+
+# Imports for metadata model classes
+from datahub.metadata.schema_classes import (
+ AuditStampClass,
+ DateTypeClass,
+ OtherSchemaClass,
+ SchemaFieldClass,
+ SchemaFieldDataTypeClass,
+ SchemaMetadataClass,
+ StringTypeClass,
+)
+
+event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
+ entityUrn=make_dataset_urn(platform="hive", name="realestate_db.sales", env="PROD"),
+ aspect=SchemaMetadataClass(
+ schemaName="customer", # not used
+ platform=make_data_platform_urn("hive"), # important <- platform must be an urn
+ version=0, # when the source system has a notion of versioning of schemas, insert this in, otherwise leave as 0
+ hash="", # when the source system has a notion of unique schemas identified via hash, include a hash, else leave it as empty string
+ platformSchema=OtherSchemaClass(rawSchema="__insert raw schema here__"),
+ lastModified=AuditStampClass(
+ time=1640692800000, actor="urn:li:corpuser:ingestion"
+ ),
+ fields=[
+ SchemaFieldClass(
+ fieldPath="address.zipcode",
+ type=SchemaFieldDataTypeClass(type=StringTypeClass()),
+ nativeDataType="VARCHAR(50)", # use this to provide the type of the field in the source system's vernacular
+ description="This is the zipcode of the address. Specified using extended form and limited to addresses in the United States",
+ lastModified=AuditStampClass(
+ time=1640692800000, actor="urn:li:corpuser:ingestion"
+ ),
+ ),
+ SchemaFieldClass(
+ fieldPath="address.street",
+ type=SchemaFieldDataTypeClass(type=StringTypeClass()),
+ nativeDataType="VARCHAR(100)",
+ description="Street corresponding to the address",
+ lastModified=AuditStampClass(
+ time=1640692800000, actor="urn:li:corpuser:ingestion"
+ ),
+ ),
+ SchemaFieldClass(
+ fieldPath="last_sold_date",
+ type=SchemaFieldDataTypeClass(type=DateTypeClass()),
+ nativeDataType="Date",
+ description="Date of the last sale date for this property",
+ created=AuditStampClass(
+ time=1640692800000, actor="urn:li:corpuser:ingestion"
+ ),
+ lastModified=AuditStampClass(
+ time=1640692800000, actor="urn:li:corpuser:ingestion"
+ ),
+ ),
+ ],
+ ),
+)
+
+# Create rest emitter
+rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080")
+rest_emitter.emit(event)
+
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see `realestate_db.sales` dataset has been created.
+
+![dataset-created](../../imgs/apis/tutorials/dataset-created.png)
+
+## What's Next?
+
+Now that you created a dataset, how about enriching it? Here are some guides that you can check out.
+
+- [how to add a tag on a dataset](/docs/api/tutorials/adding-tags.md).
+- [how to add a term on a dataset](/docs/api/tutorials/adding-terms.md).
+- [how to add owner on a dataset](/docs/api/tutorials/adding-ownerships.md).
+- [how to add lineage on a dataset](/docs/api/tutorials/adding-lineage.md).
diff --git a/docs/api/tutorials/creating-domain.md b/docs/api/tutorials/creating-domain.md
new file mode 100644
index 0000000000000..d963730c28fec
--- /dev/null
+++ b/docs/api/tutorials/creating-domain.md
@@ -0,0 +1,85 @@
+# Creating Domains
+
+## Why Would You Create Domains?
+
+Domains are curated, top-level folders or categories where related assets can be explicitly grouped. Management of Domains can be centralized, or distributed out to Domain owners Currently, an asset can belong to only one Domain at a time.
+For more information about domains, refer to [About DataHub Domains](/docs/domains.md).
+
+### Goal Of This Guide
+
+This guide will show you how to create a domain named `Marketing`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+## Create Domain with GrpahQL
+
+::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+mutation createDomain {
+ createDomain(input: { name: "Marketing", description: "Entities related to the marketing department" })
+}
+```
+
+If you see the following response, the operation was successful:
+
+```json
+{
+ "data": {
+ "createDomain": ""
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation createDomain { createDomain(input: { name: \"Marketing\", description: \"Entities related to the marketing department.\" }) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "createDomain": "" }, "extensions": {} }
+```
+
+## Create a Domain With Python SDK
+
+The following code creates a domain named `Marketing`.
+
+```python
+{{ inline /metadata-ingestion/examples/library/create_domain.py show_path_as_comment }}
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see `Marketing` domain has been created under `Govern > Domains`.
+
+![domain-created](../../imgs/apis/tutorials/domain-created.png)
+
+## What's Next?
+
+Now that you created a domain, how about enriching it? Here is a guide that you can check out.
+
+- [how to add a dataset to a domain](/docs/api/tutorials/adding-domain.md).
diff --git a/docs/api/tutorials/creating-tags.md b/docs/api/tutorials/creating-tags.md
new file mode 100644
index 0000000000000..e468f368c02c5
--- /dev/null
+++ b/docs/api/tutorials/creating-tags.md
@@ -0,0 +1,115 @@
+# Creating Tags
+
+## Why Would You Create Tags?
+
+Tags are informal, loosely controlled labels that help in search & discovery. They can be added to datasets, dataset schemas, or containers, for an easy way to label or categorize entities – without having to associate them to a broader business glossary or vocabulary.
+
+For moreinformation about tags, refer to [About DataHub Tags](/docs/tags.md).
+
+### Goal Of This Guide
+
+This guide will show you how to create a tag named `Deprecated`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+## Create Tags With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependancies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```python
+mutation createTag {
+ createTag(input:
+ {
+ name: "Deprecated",
+ description: "Having this tag means this column or table is deprecated."
+ })
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "createTag": ""
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation createTag { createTag(input: { name: \"Deprecated\", description: \"Having this tag means this column or table is deprecated.\" }) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "createTag": "" }, "extensions": {} }
+```
+
+## Create Tags With Python SDK
+
+The following code creates a tag named `Deprecated`.
+You can refer to the full code in [create_tag.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/create_tag.py).
+
+```python
+import logging
+
+from datahub.emitter.mce_builder import make_tag_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.emitter.rest_emitter import DatahubRestEmitter
+
+# Imports for metadata model classes
+from datahub.metadata.schema_classes import TagPropertiesClass
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+tag_urn = make_tag_urn("deprecated")
+tag_properties_aspect = TagPropertiesClass(
+ name="Deprecated",
+ description="Having this tag means this column or table is deprecated.",
+)
+
+event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
+ entityUrn=tag_urn,
+ aspect=tag_properties_aspect,
+)
+
+# Create rest emitter
+rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080")
+rest_emitter.emit(event)
+log.info(f"Created tag {tag_urn}")
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see `Deprecated` tag has been created.
+
+![tag-created](../../imgs/apis/tutorials/tag-created.png)
+
+## What's Next?
+
+Now that you created a tag, how about adding it to a dataset? Here's a guide on [how to add a tag on a dataset](/docs/api/tutorials/adding-tags.md).
diff --git a/docs/api/tutorials/creating-terms.md b/docs/api/tutorials/creating-terms.md
new file mode 100644
index 0000000000000..63715c3af35ee
--- /dev/null
+++ b/docs/api/tutorials/creating-terms.md
@@ -0,0 +1,117 @@
+# Creating Terms
+
+## Why Would You Create Terms?
+
+The Business Glossary(Term) feature in DataHub helps you use a shared vocabulary within the orgarnization, by providing a framework for defining a standardized set of data concepts and then associating them with the physical assets that exist within your data ecosystem.
+
+For moreinformation about terms, refer to [About DataHub Business Glossary](/docs/glossary/business-glossary.md).
+
+### Goal Of This Guide
+
+This guide will show you how to create a term named `Rate of Return`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+## Create Terms With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependancies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```python
+mutation createGlossaryTerm {
+ createGlossaryTerm(input:
+ {
+ name: "Rate of Return",
+ description: "A rate of return (RoR) is the net gain or loss of an investment over a specified time period."
+ })
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "createGlossaryTerm": ""
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation createGlossaryTerm { createGlossaryTerm(input: { name: \"Rate of Return\", description: \"A rate of return (RoR) is the net gain or loss of an investment over a specified time period.\" }) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "createGlossaryTerm": "" }, "extensions": {} }
+```
+
+## Create Terms With Python SDK
+
+The following code creates a term named `Rate of Return`.
+You can refer to the full code in [create_term.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/create_term.py).
+
+```python
+import logging
+
+from datahub.emitter.mce_builder import make_term_urn
+from datahub.emitter.mcp import MetadataChangeProposalWrapper
+from datahub.emitter.rest_emitter import DatahubRestEmitter
+
+# Imports for metadata model classes
+from datahub.metadata.schema_classes import GlossaryTermInfoClass
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+term_urn = make_term_urn("rateofreturn")
+term_properties_aspect = GlossaryTermInfoClass(
+ definition="A rate of return (RoR) is the net gain or loss of an investment over a specified time period.",
+ name="Rate of Return",
+ termSource="",
+)
+
+event: MetadataChangeProposalWrapper = MetadataChangeProposalWrapper(
+ entityUrn=term_urn,
+ aspect=term_properties_aspect,
+)
+
+# Create rest emitter
+rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080")
+rest_emitter.emit(event)
+log.info(f"Created term {term_urn}")
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see `Rate of Return` term has been created.
+To view the definition, you can either click on 'Govern > Glossary' at the top right of the page or simply search for the term by name.
+
+![term-created](../../imgs/apis/tutorials/term-created.png)
+
+## What's Next?
+
+Now that you created a term, how about adding it to a dataset? Here's a guide on [how to add a term on a dataset](/docs/api/tutorials/adding-terms.md).
diff --git a/docs/api/tutorials/creating-users-and-groups.md b/docs/api/tutorials/creating-users-and-groups.md
new file mode 100644
index 0000000000000..837598f2ac560
--- /dev/null
+++ b/docs/api/tutorials/creating-users-and-groups.md
@@ -0,0 +1,188 @@
+# Creating or Updating Users And Groups
+
+## Why Would You Create or Update Users and Groups?
+
+Users and groups are essential for managing ownership of data.
+By creating or updating user accounts and assigning them to appropriate groups, administrators can ensure that the right people can access the data they need to do their jobs.
+This helps to avoid confusion or conflicts over who is responsible for specific datasets and can improve the overall effectiveness.
+
+### Goal Of This Guide
+
+This guide will show you how to create or update users and groups.
+
+## Pre-requisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed information, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+In this guide, ingesting sample data is optional.
+:::
+
+## Upsert Users And Groups With Datahub CLI
+
+You can ingest users and groups with `yaml` using Datahub CLI.
+
+### Upsert User
+
+Save this `user.yaml` as a local file.
+
+```yaml
+- id: bar@acryl.io
+ first_name: The
+ last_name: Bar
+ email: bar@acryl.io
+ slack: "@the_bar_raiser"
+ description: "I like raising the bar higher"
+ groups:
+ - foogroup@acryl.io
+- id: datahub
+ slack: "@datahubproject"
+ phone: "1-800-GOT-META"
+ description: "The DataHub Project"
+ picture_link: "https://raw.githubusercontent.com/datahub-project/datahub/master/datahub-web-react/src/images/datahub-logo-color-stable.svg"
+```
+
+Execute the following CLI command to ingest user data.
+Since the user datahub already exists in the sample data, any updates made to the user information will overwrite the existing data.
+
+```
+datahub user upsert -f user.yaml
+```
+
+If you see the following logs, the operation was successful:
+
+```shell
+Update succeeded for urn urn:li:corpuser:bar@acryl.io.
+Update succeeded for urn urn:li:corpuser:datahub.
+```
+
+### Upsert Group
+
+Save this `group.yaml` as a local file. Note that the group includes a list of users who are admins (these will be marked as owners) and members.
+Within these lists, you can refer to the users by their ids or their urns, and can additionally specify their metadata inline within the group description itself. See the example below to understand how this works and feel free to make modifications to this file locally to see the effects of your changes in your local DataHub instance.
+
+```yaml
+id: foogroup@acryl.io
+display_name: Foo Group
+admins:
+ - datahub
+members:
+ - bar@acryl.io # refer to a user either by id or by urn
+ - id: joe@acryl.io # inline specification of user
+ slack: "@joe_shmoe"
+ display_name: "Joe's Hub"
+```
+
+Execute the following CLI command to ingest this group's information.
+
+```
+datahub group upsert -f group.yaml
+```
+
+If you see the following logs, the operation was successful:
+
+```shell
+Update succeeded for group urn:li:corpGroup:foogroup@acryl.io.
+```
+
+## Upsert Users And Groups With Python SDK
+
+### Upsert User
+
+The following code creates a user named `The Bar` with urn `urn:li:corpuser:bar@acryl.io`.
+You can refer to the full code in [upsert_user.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/upsert_user.py).
+
+```python
+import logging
+
+from datahub.api.entities.corpuser.corpuser import CorpUser, CorpUserGenerationConfig
+from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+user_email = "bar@acryl.io"
+
+user: CorpUser = CorpUser(
+ id=user_email,
+ display_name="The Bar",
+ email=user_email,
+ title="Software Engineer",
+ first_name="The",
+ last_name="Bar",
+ full_name="The Bar",
+)
+
+# Create graph client
+datahub_graph = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080"))
+for event in user.generate_mcp(
+ generation_config=CorpUserGenerationConfig(override_editable=False)
+):
+ datahub_graph.emit(event)
+log.info(f"Upserted user {user.urn}")
+```
+
+### Upsert Group
+
+The following code creates a group called `Foo Group` with group `urn:li:corpgroup:foogroup@acryl.io`.
+You can refer to the full code in [upsert_group.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/upsert_group.py).
+
+```python
+import logging
+
+from datahub.api.entities.corpgroup.corpgroup import (
+ CorpGroup,
+ CorpGroupGenerationConfig,
+)
+from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig
+from datahub.utilities.urns.corpuser_urn import CorpuserUrn
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+group_email = "foogroup@acryl.io"
+group = CorpGroup(
+ id=group_email,
+ admins=[str(CorpuserUrn.create_from_id("datahub"))],
+ members=[
+ str(CorpuserUrn.create_from_id("bar@acryl.io")),
+ str(CorpuserUrn.create_from_id("joe@acryl.io")),
+ ],
+ groups=[],
+ display_name="Foo Group",
+ email=group_email,
+ description="Software engineering team",
+ slack="@foogroup",
+)
+
+# Create graph client
+datahub_graph = DataHubGraph(DataHubGraphConfig(server="http://localhost:8080"))
+
+for event in group.generate_mcp(
+ generation_config=CorpGroupGenerationConfig(
+ override_editable=False, datahub_graph=datahub_graph
+ )
+):
+ datahub_graph.emit(event)
+log.info(f"Upserted group {group.urn}")
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+### User
+
+You can see the user `The bar` has been created and the user `Datahub` has been updated under `Settings > Access > Users & Groups`
+![user-upserted](../../imgs/apis/tutorials/user-upserted.png)
+
+### Group
+
+You can see the group `Foo Group` has been created under `Settings > Access > Users & Groups`
+![group-upserted](../../imgs/apis/tutorials/group-upserted.png)
+
+## What's Next?
+
+Now that you created users and groups, how about adding them as an owner to a dataset? Here's a guide on [how to add an owner on a dataset](/docs/api/tutorials/adding-ownerships.md).
diff --git a/docs/api/tutorials/deleting-entities-by-urn.md b/docs/api/tutorials/deleting-entities-by-urn.md
new file mode 100644
index 0000000000000..1666f43bca521
--- /dev/null
+++ b/docs/api/tutorials/deleting-entities-by-urn.md
@@ -0,0 +1,100 @@
+# Deleting Entities By Urn
+
+## Why Would You Delete Entities?
+
+You may want to delete a dataset if it is no longer needed, contains incorrect or sensitive information, or if it was created for testing purposes and is no longer necessary in production.
+It is possible to [delete entities via CLI](/docs/how/delete-metadata.md), but a programmatic approach is necessary for scalability.
+
+There are two methods of deletion: soft delete and hard delete.
+**Soft delete** sets the Status aspect of the entity to Removed, which hides the entity and all its aspects from being returned by the UI.
+**Hard delete** physically deletes all rows for all aspects of the entity.
+
+For more information about soft delete and hard delete, please refer to [Removing Metadata from DataHub](/docs/how/delete-metadata.md#delete-by-urn).
+
+### Goal Of This Guide
+
+This guide will show you how to delete a dataset named `fct_user_deleted`.
+However, you can delete other entities like tags, terms, and owners with the same approach.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+## Delete Datasets With GraphQL
+
+> 🚫 Hard delete with GraphQL is currently not supported.
+> Please check out [API feature comparison table](/docs/api/datahub-apis.md#datahub-api-comparison) for more information.
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependancies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+mutation batchUpdateSoftDeleted {
+ batchUpdateSoftDeleted(input:
+ { urns: ["urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)"],
+ deleted: true })
+}
+```
+
+If you see the following response, the operation was successful:
+
+```json
+{
+ "data": {
+ "batchUpdateSoftDeleted": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation batchUpdateSoftDeleted { batchUpdateSoftDeleted(input: { deleted: true, urns: [\"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_deleted,PROD)\"] }) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "batchUpdateSoftDeleted": true }, "extensions": {} }
+```
+
+## Delete Datasets With Python SDK
+
+The following code deletes a hive dataset named `fct_users_deleted`.
+You can refer to the complete code in [delete_dataset.py](https://github.com/datahub-project/datahub/blob/master/metadata-ingestion/examples/library/delete_dataset.py).
+
+```python
+import logging
+from datahub.cli import delete_cli
+from datahub.emitter.rest_emitter import DatahubRestEmitter
+from datahub.emitter.mce_builder import make_dataset_urn
+
+log = logging.getLogger(__name__)
+logging.basicConfig(level=logging.INFO)
+
+rest_emitter = DatahubRestEmitter(gms_server="http://localhost:8080")
+dataset_urn = make_dataset_urn(name="fct_users_created", platform="hive")
+
+delete_cli._delete_one_urn(urn=dataset_urn, soft=true, cached_emitter=rest_emitter)
+
+log.info(f"Deleted dataset {dataset_urn}")
+```
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+The dataset `fct_users_deleted` has now been deleted, so if you search for a hive dataset named `fct_users_delete`, you will no longer be able to see it.
+
+![dataset-deleted](../../imgs/apis/tutorials/dataset-deleted.png)
diff --git a/docs/api/tutorials/reading-deprecation.md b/docs/api/tutorials/reading-deprecation.md
new file mode 100644
index 0000000000000..6f8a1f74b9a73
--- /dev/null
+++ b/docs/api/tutorials/reading-deprecation.md
@@ -0,0 +1,94 @@
+# Reading Deprecation On Datasets
+
+## Why Would You Read Deprecation?
+
+Deprecation indicates the status of an entity. For datasets, keeping the deprecation status up-to-date is important to inform users and downstream systems of changes to the dataset's availability or reliability. By updating the status, you can prevent issues and ensure users have access to the most reliable data.
+
+### Goal Of This Guide
+
+This guide will show you how to read the deprecation status of dataset `fct_users_created`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before reading the deprecation status, you need to ensure the targeted dataset is already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from sample ingestion.
+If you want to know how to create datasets using APIs & SDKs, please refer to [Creating Datasets](/docs/api/tutorials/creating-datasets.md).
+:::
+
+## Read Deprecation With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+query {
+ dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)") {
+ deprecation {
+ deprecated
+ decommissionTime
+ }
+ }
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "dataset": {
+ "deprecation": {
+ "deprecated": false,
+ "decommissionTime": null
+ }
+ }
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "{ dataset(urn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\") { deprecation { deprecated decommissionTime } } }", "variables":{} }'
+```
+
+Expected Response:
+
+```json
+{
+ "data": {
+ "dataset": {
+ "deprecation": { "deprecated": false, "decommissionTime": null }
+ }
+ },
+ "extensions": {}
+}
+```
+
+## Add Tags With Python SDK
+
+The following code reads the deprecation status of dataset `fct_users_created`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
diff --git a/docs/api/tutorials/reading-domains.md b/docs/api/tutorials/reading-domains.md
new file mode 100644
index 0000000000000..cec98f5c7c565
--- /dev/null
+++ b/docs/api/tutorials/reading-domains.md
@@ -0,0 +1,113 @@
+# Reading Domains On Datasets/Columns
+
+## Why Would You Read Domains?
+
+Domains are curated, top-level folders or categories where related assets can be explicitly grouped. Management of Domains can be centralized, or distributed out to Domain owners Currently, an asset can belong to only one Domain at a time.
+For more information about domains, refer to [About DataHub Domains](/docs/domains.md).
+
+### Goal Of This Guide
+
+This guide will show you how to read domains attached to a dataset `fct_users_created`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before reading domains, you need to ensure the targeted dataset and the domain are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+
+Specifically, we will assume that the domain `Marketing` is attached to the dataset `fct_users_created`.
+To learn how to add datasets to a domain, please refer to our documentation on [Adding Domain](/docs/api/tutorials/adding-domain.md).
+:::
+
+## Read Domains With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+query {
+ dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)") {
+ domain {
+ associatedUrn
+ domain {
+ urn
+ properties {
+ name
+ }
+ }
+ }
+ }
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "dataset": {
+ "domain": {
+ "associatedUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
+ "domain": {
+ "urn": "urn:li:domain:71b3bf7b-2e3f-4686-bfe1-93172c8c4e10",
+ "properties": {
+ "name": "Marketing"
+ }
+ }
+ }
+ }
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "{ dataset(urn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\") { domain { associatedUrn domain { urn properties { name } } } } }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{
+ "data": {
+ "dataset": {
+ "domain": {
+ "associatedUrn": "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
+ "domain": {
+ "urn": "urn:li:domain:71b3bf7b-2e3f-4686-bfe1-93172c8c4e10",
+ "properties": { "name": "Marketing" }
+ }
+ }
+ }
+ },
+ "extensions": {}
+}
+```
+
+## Read Domains With Python SDK
+
+The following code reads domains attached to a dataset `fct_users_created`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
diff --git a/docs/api/tutorials/reading-owners.md b/docs/api/tutorials/reading-owners.md
new file mode 100644
index 0000000000000..b5ca8acf87ac2
--- /dev/null
+++ b/docs/api/tutorials/reading-owners.md
@@ -0,0 +1,121 @@
+# Reading Owners On Datasets/Columns
+
+## Why Would You Read Owners?
+
+Assigning an owner to an entity helps to establish accountability for the metadata and collaborating as a team.
+If there are any issues or questions about the data, the designated owner can serve as a reliable point of contact.
+
+### Goal Of This Guide
+
+This guide will show you how to read owners attached to a dataset `SampleHiveDataset`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before reading owners, you need to ensure the targeted dataset and the owner are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+:::
+
+## Read Owners With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+query {
+ dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)") {
+ ownership {
+ owners {
+ owner {
+ ... on CorpUser {
+ urn
+ type
+ }
+ ... on CorpGroup {
+ urn
+ type
+ }
+ }
+ }
+ }
+ }
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "dataset": {
+ "ownership": {
+ "owners": [
+ {
+ "owner": {
+ "urn": "urn:li:corpuser:jdoe",
+ "type": "CORP_USER"
+ }
+ },
+ {
+ "owner": {
+ "urn": "urn:li:corpuser:datahub",
+ "type": "CORP_USER"
+ }
+ }
+ ]
+ }
+ }
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "{ dataset(urn: \"urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)\") { ownership { owners { owner { ... on CorpUser { urn type } ... on CorpGroup { urn type } } } } } }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{
+ "data": {
+ "dataset": {
+ "ownership": {
+ "owners": [
+ { "owner": { "urn": "urn:li:corpuser:jdoe", "type": "CORP_USER" } },
+ { "owner": { "urn": "urn:li:corpuser:datahub", "type": "CORP_USER" } }
+ ]
+ }
+ }
+ },
+ "extensions": {}
+}
+```
+
+## Read Owners With Python SDK
+
+The following code reads owners attached to a dataset `fct_users_created`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
diff --git a/docs/api/tutorials/reading-tags.md b/docs/api/tutorials/reading-tags.md
new file mode 100644
index 0000000000000..79be5ff805ec7
--- /dev/null
+++ b/docs/api/tutorials/reading-tags.md
@@ -0,0 +1,128 @@
+# Reading Tags On Datasets/Columns
+
+## Why Would You Read Tags?
+
+Tags are informal, loosely controlled labels that help in search & discovery. They can be added to datasets, dataset schemas, or containers, for an easy way to label or categorize entities – without having to associate them to a broader business glossary or vocabulary.
+
+For more information about tags, refer to [About DataHub Tags](/docs/tags.md).
+
+### Goal Of This Guide
+
+This guide will show you how to read tags attached to a dataset `SampleHiveDataset`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before reading tags, you need to ensure the targeted dataset and the tag are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+If you want to know how to create tags using APIs & SDKs, please refer to [Creating Tags](/docs/api/tutorials/creating-tags.md) and [Adding Tags](/docs/api/tutorials/adding-tags.md).
+:::
+
+## Read Tags With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+query {
+ dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)") {
+ tags {
+ tags {
+ tag {
+ name
+ urn
+ properties {
+ description
+ colorHex
+ }
+ }
+ }
+ }
+ }
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "dataset": {
+ "tags": {
+ "tags": [
+ {
+ "tag": {
+ "name": "Legacy",
+ "urn": "urn:li:tag:Legacy",
+ "properties": {
+ "description": "Indicates the dataset is no longer supported",
+ "colorHex": null,
+ "name": "Legacy"
+ }
+ }
+ }
+ ]
+ }
+ }
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "{dataset(urn: \"urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)\") {tags {tags {tag {name urn properties { description colorHex } } } } } }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{
+ "data": {
+ "dataset": {
+ "tags": {
+ "tags": [
+ {
+ "tag": {
+ "name": "Legacy",
+ "urn": "urn:li:tag:Legacy",
+ "properties": {
+ "description": "Indicates the dataset is no longer supported",
+ "colorHex": null
+ }
+ }
+ }
+ ]
+ }
+ }
+ },
+ "extensions": {}
+}
+```
+
+## Read Tags With Python SDK
+
+The following code reads tags attached to a dataset `SampleHiveDataset`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
diff --git a/docs/api/tutorials/reading-terms.md b/docs/api/tutorials/reading-terms.md
new file mode 100644
index 0000000000000..be139342dbc12
--- /dev/null
+++ b/docs/api/tutorials/reading-terms.md
@@ -0,0 +1,107 @@
+# Reading Terms On Datasets/Columns
+
+## Why Would You Read Terms?
+
+The Business Glossary(Term) feature in DataHub helps you use a shared vocabulary within the orgarnization, by providing a framework for defining a standardized set of data concepts and then associating them with the physical assets that exist within your data ecosystem.
+
+For more information about terms, refer to [About DataHub Business Glossary](/docs/glossary/business-glossary.md).
+
+### Goal Of This Guide
+
+This guide will show you how to read terms attached to a dataset `SampleHiveDataset`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before adding terms, you need to ensure the targeted dataset and the term are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+
+Specifically, we will assume that the term `CustomerAccount` is attached to a dataset `fct_users_created`.
+To learn how to add terms to your own datasets, please refer to our documentation on [Adding Terms](/docs/api/tutorials/adding-terms.md).
+:::
+
+## Read Terms With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+query {
+ dataset(urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)") {
+ glossaryTerms {
+ terms {
+ term {
+ urn
+ glossaryTermInfo {
+ name
+ description
+ }
+ }
+ }
+ }
+ }
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "dataset": {
+ "glossaryTerms": {
+ "terms": [
+ {
+ "term": {
+ "urn": "urn:li:glossaryTerm:CustomerAccount",
+ "glossaryTermInfo": {
+ "name": "CustomerAccount",
+ "description": "account that represents an identified, named collection of balances and cumulative totals used to summarize customer transaction-related activity over a designated period of time"
+ }
+ }
+ }
+ ]
+ }
+ }
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "{dataset(urn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\") {glossaryTerms {terms {term {urn glossaryTermInfo { name description } } } } } }", "variables":{}}'
+```
+
+Expected Response:
+
+````json
+{"data":{"dataset":{"glossaryTerms":{"terms":[{"term":{"urn":"urn:li:glossaryTerm:CustomerAccount","glossaryTermInfo":{"name":"CustomerAccount","description":"account that represents an identified, named collection of balances and cumulative totals used to summarize customer transaction-related activity over a designated period of time"}}}]}}},"extensions":{}}```
+````
+
+## Read Terms With Python SDK
+
+The following code reads terms attached to a dataset `fct_users_created`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
diff --git a/docs/api/tutorials/removing-domains.md b/docs/api/tutorials/removing-domains.md
new file mode 100644
index 0000000000000..0502501b01228
--- /dev/null
+++ b/docs/api/tutorials/removing-domains.md
@@ -0,0 +1,82 @@
+# Reading Domains From Datasets
+
+## Why Would You Remove Domains?
+
+Domains are curated, top-level folders or categories where related assets can be explicitly grouped. Management of Domains can be centralized, or distributed out to Domain owners Currently, an asset can belong to only one Domain at a time.
+For more information about domains, refer to [About DataHub Domains](/docs/domains.md).
+
+### Goal Of This Guide
+
+This guide will show you how to remove the domain `Marketing` from the `fct_users_created` datatset.
+
+## Pre-requisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed information, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before removing domains, you need to ensure the targeted dataset and the domain are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+
+Specifically, we will assume that the domain `Marketing` is attached to the dataset `fct_users_created`.
+To learn how to add datasets to a domain, please refer to our documentation on [Adding Domain](/docs/api/tutorials/adding-domain.md).
+:::
+
+## Remove Domains With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```python
+mutation unsetDomain {
+ unsetDomain(
+ entityUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"
+ )
+}
+```
+
+Expected Response:
+
+```python
+{
+ "data": {
+ "removeDomain": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation unsetDomain { unsetDomain(entityUrn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\") }", "variables":{}}'
+```
+
+## Remove Domain With Python SDK
+
+Following codes remove an domain named `Marketing` from a dataset named `fct_users_created`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see a domain `Marketing` has been removed from the `fct_users_created` dataset.
+
+![domain-removed](../../imgs/apis/tutorials/domain-removed.png)
diff --git a/docs/api/tutorials/removing-owners.md b/docs/api/tutorials/removing-owners.md
new file mode 100644
index 0000000000000..fd2e5f0d28872
--- /dev/null
+++ b/docs/api/tutorials/removing-owners.md
@@ -0,0 +1,103 @@
+# Removing Owners From Datasets
+
+## Why Would You Remove Owners?
+
+Assigning an owner to an entity helps to establish accountability for the metadata and collaborating as a team.
+If there are any issues or questions about the data, the designated owner can serve as a reliable point of contact.
+
+### Goal Of This Guide
+
+This guide will show you how to remove the owner `John Doe` from the `SampleHdfsDataset` datatset.
+
+## Pre-requisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed information, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before removing owners, you need to ensure the targeted dataset and the owner are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+:::
+
+## Remove Owners With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```python
+mutation removeOwners {
+ removeOwner(
+ input: {
+ ownerUrn: "urn:li:corpuser:jdoe",
+ resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)",
+ }
+ )
+}
+```
+
+Note that you can also remove owners from multiple entities or subresource using `batchRemoveOwners`.
+
+```json
+mutation batchRemoveOwners {
+ batchRemoveOwners(
+ input: {
+ ownerUrns: ["urn:li:corpuser:jdoe"],
+ resources: [
+ { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)"} ,
+ { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"} ,]
+ }
+ )
+}
+```
+
+Expected Response:
+
+```python
+{
+ "data": {
+ "removeOwner": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation removeOwner { removeOwner(input: { ownerUrn: \"urn:li:corpuser:jdoe\", resourceUrn: \"urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)\" }) }", "variables":{}}'
+```
+
+## Remove Ownerships With Python SDK
+
+Following codes remove an owner named `John Doe` from a dataset named `SampleHdfsDataset`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+Expected Response:
+
+```json
+{ "data": { "removeOwner": true }, "extensions": {} }
+```
+
+## Expected Outcomes
+
+You can now see `John Doe` has been removed as an owner from the `fct_users_created` dataset.
+
+![ownership-removed](../../imgs/apis/tutorials/owner-removed.png)
diff --git a/docs/api/tutorials/removing-tags.md b/docs/api/tutorials/removing-tags.md
new file mode 100644
index 0000000000000..4b1c2d33f7b82
--- /dev/null
+++ b/docs/api/tutorials/removing-tags.md
@@ -0,0 +1,119 @@
+# Removing Tags From Datasets/Columns
+
+## Why Would You Remove Tags?
+
+Tags are informal, loosely controlled labels that help in search & discovery. They can be added to datasets, dataset schemas, or containers, for an easy way to label or categorize entities – without having to associate them to a broader business glossary or vocabulary.
+
+For more information about tags, refer to [About DataHub Tags](/docs/tags.md).
+
+### Goal Of This Guide
+
+This guide will show you how to remove a `Legacy` from the `shipment_info` column of a dataset called `SampleHdfsDataset`.
+Additionally, we will cover how to remove a tag from the dataset or from multiple entities.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before removing tags, you need to ensure the targeted dataset and the tag are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+If you want to know how to create tags using APIs & SDKs, please refer to [Creating Tags](/docs/api/tutorials/creating-tags.md) and [Creating Datasets](/docs/api/tutorials/creating-datasets.md).
+:::
+
+## Remove Tags With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+mutation removeTag {
+ removeTag(
+ input: {
+ tagUrn: "urn:li:tag:Legacy",
+ resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)",
+ subResourceType:DATASET_FIELD,
+ subResource:"shipment_info"})
+}
+```
+
+Note that you can also remove a tag from a dataset if you don't specify `subResourceType` and `subResource`.
+
+```json
+mutation removeTag {
+ removeTag(
+ input: {
+ tagUrn: "urn:li:tag:Legacy",
+ resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)",
+ }
+ )
+}
+```
+
+Note that you can also remove tags from multiple entities or subresource using `batchRemoveTags`.
+
+```json
+mutation batchRemoveTags {
+ batchRemoveTags(
+ input: {
+ tagUrns: ["urn:li:tag:Legacy"],
+ resources: [
+ { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)"} ,
+ { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"} ,]
+ }
+ )
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "removeTag": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation removeTag { removeTag(input: { tagUrn: \"urn:li:tag:Legacy\", resourceUrn: \"urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)\" }) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "removeTag": true }, "extensions": {} }
+```
+
+## Remove Tags With Python SDK
+
+The following code removes a tag named `Legacy` from `shipment_info` column of a dataset called `SampleHdfsDataset`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see `Legacy` tag has been removed to `shipment_info` column.
+
+![tag-removed](../../imgs/apis/tutorials/tag-removed.png)
diff --git a/docs/api/tutorials/removing-terms.md b/docs/api/tutorials/removing-terms.md
new file mode 100644
index 0000000000000..bac66f77ae417
--- /dev/null
+++ b/docs/api/tutorials/removing-terms.md
@@ -0,0 +1,119 @@
+# Removing Terms From Datasets/Columns
+
+## Why Would You Remove Terms?
+
+The Business Glossary(Term) feature in DataHub helps you use a shared vocabulary within the orgarnization, by providing a framework for defining a standardized set of data concepts and then associating them with the physical assets that exist within your data ecosystem.
+
+For more information about terms, refer to [About DataHub Business Glossary](/docs/glossary/business-glossary.md).
+
+### Goal Of This Guide
+
+This guide will show you how to remove a term `CustomerAccount` from the `user_name` column of a dataset called `fct_users_created`.
+Additionally, we will cover how to remove a term from the dataset or from multiple entities.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before removing terms, you need to ensure the targeted dataset and the term are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+Specifically, we will assume that the term `CustomerAccount` is attached to the `user_name` column of a dataset `fct_users_created`.
+To learn how to add terms to your own datasets, please refer to our documentation on [Adding Terms](/docs/api/tutorials/adding-terms.md).
+:::
+
+## Remove Terms With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+mutation removeTerm {
+ removeTerm(
+ input: {
+ termUrn: "urn:li:glossaryTerm:CustomerAccount",
+ resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
+ subResourceType:DATASET_FIELD,
+ subResource:"user_name"})
+}
+```
+
+Note that you can also remove a term from a dataset if you don't specify `subResourceType` and `subResource`.
+
+```json
+mutation removeTerm {
+ removeTerm(
+ input: {
+ termUrn: "urn:li:glossaryTerm:CustomerAccount",
+ resourceUrn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)",
+ })
+}
+```
+
+Also note that you can remove terms from multiple entities or subresource using `batchRemoveTerms`.
+
+```json
+mutation batchRemoveTerms {
+ batchRemoveTerms(
+ input: {
+ termUrns: ["urn:li:glossaryTerm:CustomerAccount"],
+ resources: [
+ { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)"} ,
+ { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"} ,]
+ }
+ )
+}
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "removeTerm": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation removeTerm { removeTerm(input: { termUrn: \"urn:li:glossaryTerm:CustomerAccount\", resourceUrn: \"urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)\" }) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "removeTerm": true }, "extensions": {} }
+```
+
+## Remove Terms With Python SDK
+
+The following code removes a term named `Legacy` from `shipment_info` column of a dataset called `SampleHdfsDataset`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see `CustomerAccount` term has been removed to `user_name` column.
+
+![term-removed](../../imgs/apis/tutorials/term-removed.png)
diff --git a/docs/api/tutorials/update-deprecation.md b/docs/api/tutorials/update-deprecation.md
new file mode 100644
index 0000000000000..c0cf5b755a545
--- /dev/null
+++ b/docs/api/tutorials/update-deprecation.md
@@ -0,0 +1,98 @@
+# Update Deprecation
+
+## Why Would You Update Deprecation?
+
+Deprecation indicates the status of an entity. For datasets, keeping the deprecation status up-to-date is important to inform users and downstream systems of changes to the dataset's availability or reliability. By updating the status, you can prevent issues and ensure users have access to the most reliable data.
+
+### Goal Of This Guide
+
+This guide will show you how to update deprecation status of a dataset `fct_users_created`.
+
+## Prerequisites
+
+For this tutorial, you need to deploy DataHub Quickstart and ingest sample data.
+For detailed steps, please refer to [Datahub Quickstart Guide](/docs/quickstart.md).
+
+:::note
+Before removing tags, you need to ensure the targeted dataset and the tag are already present in your datahub.
+If you attempt to manipulate entities that do not exist, your operation will fail.
+In this guide, we will be using data from a sample ingestion.
+:::
+
+## Add Tags With GraphQL
+
+:::note
+Please note that there are two available endpoints (`:8000`, `:9002`) to access GraphQL.
+For more information about the differences between these endpoints, please refer to [DataHub Metadata Service](../../../metadata-service/README.md#graphql-api)
+:::
+
+### GraphQL Explorer
+
+GraphQL Explorer is the fastest way to experiment with GraphQL without any dependencies.
+Navigate to GraphQL Explorer (`http://localhost:9002/api/graphiql`) and run the following query.
+
+```json
+mutation updateDeprecation {
+ updateDeprecation(input: { urn: "urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)", deprecated: true })
+}
+```
+
+Also note that you can update deprecation status of multiple entities or subresource using `batchUpdateDeprecation`.
+
+```json
+mutation batchUpdateDeprecation {
+ batchUpdateDeprecation(
+ input: {
+ deprecated: true,
+ resources: [
+ { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hdfs,SampleHdfsDataset,PROD)"} ,
+ { resourceUrn:"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)"} ,]
+ }
+ )
+}
+
+```
+
+If you see the following response, the operation was successful:
+
+```python
+{
+ "data": {
+ "updateDeprecation": true
+ },
+ "extensions": {}
+}
+```
+
+### CURL
+
+With CURL, you need to provide tokens. To generate a token, please refer to [Access Token Management](/docs/api/graphql/token-management.md).
+With `accessToken`, you can run the following command.
+
+```shell
+curl --location --request POST 'http://localhost:8080/api/graphql' \
+--header 'Authorization: Bearer ' \
+--header 'Content-Type: application/json' \
+--data-raw '{ "query": "mutation updateDeprecation { updateDeprecation(input: { deprecated: true, urn: \"urn:li:dataset:(urn:li:dataPlatform:hive,fct_users_created,PROD)\" }) }", "variables":{}}'
+```
+
+Expected Response:
+
+```json
+{ "data": { "removeTag": true }, "extensions": {} }
+```
+
+## Add Tags With Python SDK
+
+The following code update deprecation status of a dataset `fct_users_created`.
+
+> Coming Soon!
+
+We're using the `MetdataChangeProposalWrapper` to change entities in this example.
+For more information about the `MetadataChangeProposal`, please refer to [MetadataChangeProposal & MetadataChangeLog Events](/docs/advanced/mcp-mcl.md)
+
+## Expected Outcomes
+
+You can now see the dataset `fct_users_created` has been marked as `Deprecated.`
+
+![tag-removed](../../imgs/apis/tutorials/deprecation-updated.png)
diff --git a/docs/developers.md b/docs/developers.md
index 401169490dd4b..e676d8abc36eb 100644
--- a/docs/developers.md
+++ b/docs/developers.md
@@ -121,12 +121,22 @@ Open `datahub.ipr` in IntelliJ to start developing!
For consistency please import and auto format the code using [LinkedIn IntelliJ Java style](../gradle/idea/LinkedIn%20Style.xml).
+<<<<<<< HEAD
+
+## Windows Compatibility
+
+For optimal performance and compatibility, we strongly recommend building on a Mac or Linux system.
+Please note that we do not actively support Windows in a non-virtualized environment.
+
+If you must use Windows, one workaround is to build within a virtualized environment, such as a VM (Virtual Machine).
+=======
## Windows Compatibility
For optimal performance and compatibility, we strongly recommend building on a Mac or Linux system.
Please note that we do not actively support Windows in a non-virtualized environment.
If you must use Windows, one workaround is to build within a virtualized environment, such as a VM(Virtual Machine) or [WSL(Windows Subsystem for Linux)](https://learn.microsoft.com/en-us/windows/wsl).
+>>>>>>> upstream/master
This approach can help ensure that your build environment remains isolated and stable, and that your code is compiled correctly.
## Common Build Issues
@@ -148,7 +158,11 @@ You can install multiple version of Java on a single machine and switch between
#### `:metadata-models:generateDataTemplate` task fails with `java.nio.file.InvalidPathException: Illegal char <:> at index XX` or `Caused by: java.lang.IllegalArgumentException: 'other' has different root` error
+<<<<<<< HEAD
+This is a [known issue](https://github.com/linkedin/rest.li/issues/287) when building the project on Windows due a bug in the Pegasus plugin. Please refer to [Windows Compatibility](/docs/developers.md#windows-compatibility).
+=======
This is a [known issue](https://github.com/linkedin/rest.li/issues/287) when building the project on Windows due a bug in the Pegasus plugin. Please refer to [Windows Compatibility](/docs/developers.md#windows-compatibility).
+>>>>>>> upstream/master
#### Various errors related to `generateDataTemplate` or other `generate` tasks
diff --git a/docs/glossary/business-glossary.md b/docs/glossary/business-glossary.md
index f77dc8d4f7a9e..ea2d2f46352f6 100644
--- a/docs/glossary/business-glossary.md
+++ b/docs/glossary/business-glossary.md
@@ -12,7 +12,7 @@ import FeatureAvailability from '@site/src/components/FeatureAvailability';
When working in complex data ecosystems, it is very useful to organize data assets using a shared vocabulary. The Business Glossary feature in DataHub helps you do this, by providing a framework for defining a standardized set of data concepts and then associating them with the physical assets that exist within your data ecosystem.
-Within this document, we'll introduce the core concepts comprising DataHub's Business Glossary feature and show you how to put it to work in your organization.
+Within this document, we'll introduce the core concepts comprising DataHub's Business Glossary feature and show you how to put it to work in your organization.
### Terms & Term Groups
@@ -120,6 +120,7 @@ Once you've defined your Glossary, you can begin attaching terms to data assets.
In the modal that pops up you can select the Term you care about in one of two ways:
+
- Search for the Term by name in the input
- Navigate through the Glossary dropdown that appears after clicking into the input
@@ -140,7 +141,7 @@ Glossary Terms and Term Groups abide by metadata policies like other entities. H
You can use this [Github Action](https://github.com/acryldata/business-glossary-sync-action) and bring your Business Glossary into your git repositories. This can be the starting point to manage glossary in git.
-## Managing Glossary with Git
+## Managing Glossary with Git
In many cases, it may be preferable to manage the Business Glossary in a version-control system like git. This can make
managing changes across teams easier, by funneling all changes through a change management and review process.
@@ -177,6 +178,15 @@ Check out [our demo site](https://demo.datahubproject.io/glossary) to see an exa
### GraphQL
+<<<<<<< HEAD
+- [addTerm](../../graphql/mutations.md#addterm)
+- [addTerms](../../graphql/mutations.md#addterms)
+- [batchAddTerms](../../graphql/mutations.md#batchaddterms)
+- [removeTerm](../../graphql/mutations.md#removeterm)
+- [batchRemoveTerms](../../graphql/mutations.md#batchremoveterms)
+- [createGlossaryTerm](../../graphql/mutations.md#createglossaryterm)
+- [createGlossaryNode](../../graphql/mutations.md#createglossarynode) (Term Group)
+=======
* [addTerm](../../graphql/mutations.md#addterm)
* [addTerms](../../graphql/mutations.md#addterms)
* [batchAddTerms](../../graphql/mutations.md#batchaddterms)
@@ -186,8 +196,10 @@ Check out [our demo site](https://demo.datahubproject.io/glossary) to see an exa
* [createGlossaryNode](../../graphql/mutations.md#createglossarynode) (Term Group)
You can easily fetch the Glossary Terms for an entity with a given its URN using the **glossaryTerms** property. Check out [Working with Metadata Entities](../api/graphql/how-to-set-up-graphql.md#querying-for-glossary-terms-of-an-asset) for an example.
+>>>>>>> upstream/master
## Resources
+
- [Creating a Business Glossary and Putting it to use in DataHub](https://blog.datahubproject.io/creating-a-business-glossary-and-putting-it-to-use-in-datahub-43a088323c12)
- [Tags and Terms: Two Powerful DataHub Features, Used in Two Different Scenarios](https://medium.com/datahub-project/tags-and-terms-two-powerful-datahub-features-used-in-two-different-scenarios-b5b4791e892e)
diff --git a/docs/imgs/apis/postman-graphql.png b/docs/imgs/apis/postman-graphql.png
new file mode 100644
index 0000000000000..1cffd226fdf77
Binary files /dev/null and b/docs/imgs/apis/postman-graphql.png differ
diff --git a/docs/imgs/apis/tutorials/deprecation-updated.png b/docs/imgs/apis/tutorials/deprecation-updated.png
new file mode 100644
index 0000000000000..06fedf746f694
Binary files /dev/null and b/docs/imgs/apis/tutorials/deprecation-updated.png differ
diff --git a/docs/imgs/apis/tutorials/domain-created.png b/docs/imgs/apis/tutorials/domain-created.png
new file mode 100644
index 0000000000000..cafab2a5e8d5c
Binary files /dev/null and b/docs/imgs/apis/tutorials/domain-created.png differ
diff --git a/docs/imgs/apis/tutorials/domain-removed.png b/docs/imgs/apis/tutorials/domain-removed.png
new file mode 100644
index 0000000000000..1b21172be11d2
Binary files /dev/null and b/docs/imgs/apis/tutorials/domain-removed.png differ
diff --git a/docs/imgs/apis/tutorials/group-added.png b/docs/imgs/apis/tutorials/group-added.png
new file mode 100644
index 0000000000000..1c2b0fe1af9ec
Binary files /dev/null and b/docs/imgs/apis/tutorials/group-added.png differ
diff --git a/docs/imgs/apis/tutorials/owner-removed.png b/docs/imgs/apis/tutorials/owner-removed.png
new file mode 100644
index 0000000000000..a7b6567888caf
Binary files /dev/null and b/docs/imgs/apis/tutorials/owner-removed.png differ
diff --git a/docs/imgs/apis/tutorials/tag-removed.png b/docs/imgs/apis/tutorials/tag-removed.png
new file mode 100644
index 0000000000000..31a267549843e
Binary files /dev/null and b/docs/imgs/apis/tutorials/tag-removed.png differ
diff --git a/docs/imgs/apis/tutorials/term-removed.png b/docs/imgs/apis/tutorials/term-removed.png
new file mode 100644
index 0000000000000..dbf9f35f09339
Binary files /dev/null and b/docs/imgs/apis/tutorials/term-removed.png differ
diff --git a/docs/imgs/apis/tutorials/user-added.png b/docs/imgs/apis/tutorials/user-added.png
new file mode 100644
index 0000000000000..d2695a6fa7ffc
Binary files /dev/null and b/docs/imgs/apis/tutorials/user-added.png differ
diff --git a/docs/tags.md b/docs/tags.md
index 880e57f8d0a4f..a9277a0c3652c 100644
--- a/docs/tags.md
+++ b/docs/tags.md
@@ -8,15 +8,15 @@ Tags are informal, loosely controlled labels that help in search & discovery. Th
Tags can help help you in:
-* Querying: Tagging a dataset with a phrase that a co-worker can use to query the same dataset
-* Mapping assets to a category or group of your choice
+- Querying: Tagging a dataset with a phrase that a co-worker can use to query the same dataset
+- Mapping assets to a category or group of your choice
## Tags Setup, Prerequisites, and Permissions
What you need to add tags:
-* **Edit Tags** metadata privilege to add tags at the entity level
-* **Edit Dataset Column Tags** to edit tags at the column level
+- **Edit Tags** metadata privilege to add tags at the entity level
+- **Edit Dataset Column Tags** to edit tags at the column level
You can create these privileges by creating a new [Metadata Policy](./authorization/policies.md).
@@ -72,6 +72,16 @@ You can search for a tag in the search bar, and even filter entities by the pres
### GraphQL
+<<<<<<< HEAD
+- [addTag](../graphql/mutations.md#addtag)
+- [addTags](../graphql/mutations.md#addtags)
+- [batchAddTags](../graphql/mutations.md#batchaddtags)
+- [removeTag](../graphql/mutations.md#removetag)
+- [batchRemoveTags](../graphql/mutations.md#batchremovetags)
+- [createTag](../graphql/mutations.md#createtag)
+- [updateTag](../graphql/mutations.md#updatetag)
+- [deleteTag](../graphql/mutations.md#deletetag)
+=======
* [addTag](../graphql/mutations.md#addtag)
* [addTags](../graphql/mutations.md#addtags)
* [batchAddTags](../graphql/mutations.md#batchaddtags)
@@ -82,11 +92,12 @@ You can search for a tag in the search bar, and even filter entities by the pres
* [deleteTag](../graphql/mutations.md#deletetag)
You can easily fetch the Tags for an entity with a given its URN using the **tags** property. Check out [Working with Metadata Entities](./api/graphql/how-to-set-up-graphql.md#querying-for-tags-of-an-asset) for an example.
+>>>>>>> upstream/master
### DataHub Blog
-* [Tags and Terms: Two Powerful DataHub Features, Used in Two Different Scenarios
-Managing PII in DataHub: A Practitioner’s Guide](https://blog.datahubproject.io/tags-and-terms-two-powerful-datahub-features-used-in-two-different-scenarios-b5b4791e892e)
+- [Tags and Terms: Two Powerful DataHub Features, Used in Two Different Scenarios
+ Managing PII in DataHub: A Practitioner’s Guide](https://blog.datahubproject.io/tags-and-terms-two-powerful-datahub-features-used-in-two-different-scenarios-b5b4791e892e)
## FAQ and Troubleshooting
@@ -96,16 +107,20 @@ DataHub Tags are informal, loosely controlled labels while Terms are part of a c
Usage and applications:
-* An asset may have multiple tags.
-* Tags serve as a tool for search & discovery while Terms are typically used to standardize types of leaf-level attributes (i.e. schema fields) for governance. E.g. (EMAIL_PLAINTEXT)
+- An asset may have multiple tags.
+- Tags serve as a tool for search & discovery while Terms are typically used to standardize types of leaf-level attributes (i.e. schema fields) for governance. E.g. (EMAIL_PLAINTEXT)
**How are DataHub Tags different from Domains?**
Domains are a set of top-level categories usually aligned to business units/disciplines to which the assets are most relevant. They rely on central or distributed management. A single domain is assigned per data asset.
+<<<<<<< HEAD
+_Need more help? Join the conversation in [Slack](http://slack.datahubproject.io)!_
+=======
+>>>>>>> upstream/master
### Related Features
-* [Glossary Terms](./glossary/business-glossary.md)
-* [Domains](./domains.md)
+- [Glossary Terms](./glossary/business-glossary.md)
+- [Domains](./domains.md)
diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceInput.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceInput.pdl
index d005cd557cf77..32329f60bfaa7 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceInput.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceInput.pdl
@@ -15,7 +15,8 @@ record DataProcessInstanceInput {
@Relationship = {
"/*": {
"name": "Consumes",
- "entityTypes": [ "dataset" ]
+ "entityTypes": [ "dataset" ],
+ "isLineage": true
}
}
@Searchable = {
diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceOutput.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceOutput.pdl
index f33c41e63efed..223abfb7e6f4c 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceOutput.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceOutput.pdl
@@ -15,7 +15,9 @@ record DataProcessInstanceOutput {
@Relationship = {
"/*": {
"name": "Produces",
- "entityTypes": [ "dataset" ]
+ "entityTypes": [ "dataset" ],
+ "isLineage": true,
+ "isUpstream": false
}
}
@Searchable = {
diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceProperties.pdl
index c63cb1a97c017..59c482bd91e80 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceProperties.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceProperties.pdl
@@ -1,6 +1,6 @@
namespace com.linkedin.dataprocess
-import com.linkedin.common.AuditStamp
+import com.linkedin.common.TimeStamp
import com.linkedin.common.CustomProperties
import com.linkedin.common.ExternalReference
import com.linkedin.common.Urn
@@ -50,6 +50,5 @@ record DataProcessInstanceProperties includes CustomProperties, ExternalReferenc
"fieldName": "created"
}
}
- created: AuditStamp
-
-}
+ created: TimeStamp
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl
index b54e430038082..a84b9c9d3994b 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl
@@ -4,6 +4,7 @@ import com.linkedin.common.Urn
import com.linkedin.common.Time
import com.linkedin.common.VersionTag
import com.linkedin.common.CustomProperties
+import com.linkedin.common.TimeStamp
/**
* Properties associated with an ML Model Group
@@ -25,10 +26,21 @@ record MLModelGroupProperties includes CustomProperties {
/**
* Date when the MLModelGroup was developed
*/
+ @deprecated
createdAt: optional Time
+ /**
+ * Time and Actor who created the MLModelGroup
+ */
+ created: optional TimeStamp
+
+ /**
+ * Date when the MLModelGroup was last modified
+ */
+ lastModified: optional TimeStamp
+
/**
* Version of the MLModelGroup
*/
version: optional VersionTag
-}
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl
index 621a3e1747b50..e0cab82a0943e 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl
@@ -6,6 +6,7 @@ import com.linkedin.common.Time
import com.linkedin.common.VersionTag
import com.linkedin.common.CustomProperties
import com.linkedin.common.ExternalReference
+import com.linkedin.common.TimeStamp
/**
* Properties associated with a ML Model
@@ -15,6 +16,18 @@ import com.linkedin.common.ExternalReference
}
record MLModelProperties includes CustomProperties, ExternalReference {
+ /**
+ * Display name of the MLModel
+ */
+ @Searchable = {
+ "fieldType": "WORD_GRAM",
+ "enableAutocomplete": true,
+ "boostScore": 10.0,
+ "queryByDefault": true,
+ }
+ name: optional string
+
+
/**
* Documentation of the MLModel
*/
@@ -27,8 +40,19 @@ record MLModelProperties includes CustomProperties, ExternalReference {
/**
* Date when the MLModel was developed
*/
+ @deprecated
date: optional Time
+ /**
+ * Audit stamp containing who created this and when
+ */
+ created: optional TimeStamp
+
+ /**
+ * Date when the MLModel was last modified
+ */
+ lastModified: optional TimeStamp
+
/**
* Version of the MLModel
*/
@@ -93,24 +117,24 @@ record MLModelProperties includes CustomProperties, ExternalReference {
deployments: optional array[Urn]
/**
- * List of jobs (if any) used to train the model
+ * List of jobs or process instances (if any) used to train the model
*/
@Relationship = {
"/*": {
"name": "TrainedBy",
- "entityTypes": [ "dataJob" ],
+ "entityTypes": [ "dataJob", "dataProcessInstance" ],
"isLineage": true
}
}
trainingJobs: optional array[Urn]
/**
- * List of jobs (if any) that use the model
+ * List of jobs or process instance (if any) that use the model
*/
@Relationship = {
"/*": {
"name": "UsedBy",
- "entityTypes": [ "dataJob" ],
+ "entityTypes": [ "dataJob" , "dataProcessInstance" ],
"isLineage": true,
"isUpstream": false
}
@@ -129,4 +153,4 @@ record MLModelProperties includes CustomProperties, ExternalReference {
}
}
groups: optional array[Urn]
-}
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLTrainingRunProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLTrainingRunProperties.pdl
new file mode 100644
index 0000000000000..f8b8eeafe908b
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLTrainingRunProperties.pdl
@@ -0,0 +1,36 @@
+namespace com.linkedin.ml.metadata
+
+import com.linkedin.common.AuditStamp
+import com.linkedin.common.CustomProperties
+import com.linkedin.common.ExternalReference
+import com.linkedin.common.Urn
+import com.linkedin.common.JobFlowUrn
+import com.linkedin.common.DataJobUrn
+/**
+ * The inputs and outputs of this training run
+ */
+@Aspect = {
+ "name": "mlTrainingRunProperties",
+}
+record MLTrainingRunProperties includes CustomProperties, ExternalReference {
+
+ /**
+ * Run Id of the ML Training Run
+ */
+ id: optional string
+
+ /**
+ * List of URLs for the Outputs of the ML Training Run
+ */
+ outputUrls: optional array[string]
+
+ /**
+ * Hyperparameters of the ML Training Run
+ */
+ hyperParams: optional array[MLHyperParam]
+
+ /**
+ * Metrics of the ML Training Run
+ */
+ trainingMetrics: optional array[MLMetric]
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml
index 1c3eb5b574e20..4fe170ced69f3 100644
--- a/metadata-models/src/main/resources/entity-registry.yml
+++ b/metadata-models/src/main/resources/entity-registry.yml
@@ -116,6 +116,10 @@ entities:
- dataProcessInstanceRunEvent
- status
- testResults
+ - dataPlatformInstance
+ - subTypes
+ - container
+ - mlTrainingRunProperties
- name: chart
category: core
keyAspect: chartKey