From 047644b888b121fa3feb10a5f33bdef60b1072ce Mon Sep 17 00:00:00 2001
From: Hyejin Yoon <0327jane@gmail.com>
Date: Tue, 24 Dec 2024 10:06:35 +0900
Subject: [PATCH 1/2] feat: update mlflow-related metadata models (#12174)

Co-authored-by: John Joyce <john@Johns-MacBook-Pro.local>
Co-authored-by: John Joyce <john@acryl.io>
---
 .../src/main/resources/entity.graphql         | 196 +++++++++++++++++-
 .../dataprocess/DataProcessInstanceOutput.pdl |   2 +-
 .../DataProcessInstanceProperties.pdl         |   2 +-
 .../ml/metadata/MLModelGroupProperties.pdl    |  35 ++++
 .../ml/metadata/MLModelProperties.pdl         |  28 ++-
 .../ml/metadata/MLTrainingRunProperties.pdl   |  36 ++++
 .../src/main/resources/entity-registry.yml    |   4 +
 .../com.linkedin.entity.aspects.snapshot.json |  54 +++--
 ...com.linkedin.entity.entities.snapshot.json |  99 +++++++--
 .../com.linkedin.entity.runs.snapshot.json    |  54 +++--
 ...nkedin.operations.operations.snapshot.json |  54 +++--
 ...m.linkedin.platform.platform.snapshot.json |  99 +++++++--
 12 files changed, 568 insertions(+), 95 deletions(-)
 create mode 100644 metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLTrainingRunProperties.pdl

diff --git a/datahub-graphql-core/src/main/resources/entity.graphql b/datahub-graphql-core/src/main/resources/entity.graphql
index e086273068ee53..9abf4e16f12dd7 100644
--- a/datahub-graphql-core/src/main/resources/entity.graphql
+++ b/datahub-graphql-core/src/main/resources/entity.graphql
@@ -262,8 +262,16 @@ type Query {
     Fetch all Business Attributes
     """
     listBusinessAttributes(input: ListBusinessAttributesInput!): ListBusinessAttributesResult
+
+    """
+    Fetch a Data Process Instance by primary key (urn)
+    """
+    dataProcessInstance(urn: String!): DataProcessInstance
+
+
 }
 
+
 """
 An ERModelRelationship is a high-level abstraction that dictates what datasets fields are erModelRelationshiped.
 """
@@ -9832,15 +9840,45 @@ type MLModelGroup implements EntityWithRelationships & Entity & BrowsableEntity
     privileges: EntityPrivileges
 }
 
+"""
+Properties describing a group of related ML models
+"""
 type MLModelGroupProperties {
+    """
+    Display name of the model group
+    """
+    name: String
 
+    """
+    Detailed description of the model group's purpose and contents
+    """
     description: String
 
-    createdAt: Long
+    """
+    When this model group was created
+    """
+    created: AuditStamp
 
+    """
+    When this model group was last modified
+    """
+    lastModified: AuditStamp
+
+    """
+    Version identifier for this model group
+    """
     version: VersionTag
 
+    """
+    Custom key-value properties for the model group
+    """
     customProperties: [CustomPropertiesEntry!]
+
+    """
+    Deprecated creation timestamp
+    @deprecated Use the 'created' field instead
+    """
+    createdAt: Long @deprecated(reason: "Use `created` instead")
 }
 
 """
@@ -9990,40 +10028,103 @@ description: String
 }
 
 type MLMetric {
+    """
+    Name of the metric (e.g. accuracy, precision, recall)
+    """
     name: String
 
+    """
+    Description of what this metric measures
+    """
     description: String
 
+    """
+    The computed value of the metric
+    """
     value: String
 
+    """
+    Timestamp when this metric was recorded
+    """
     createdAt: Long
 }
 
 type MLModelProperties {
+    """
+    The display name of the model used in the UI
+    """
+    name: String!
 
+    """
+    Detailed description of the model's purpose and characteristics
+    """
     description: String
 
-    date: Long
+    """
+    When the model was last modified
+    """
+    lastModified: AuditStamp
 
+    """
+    Version identifier for this model
+    """
     version: String
 
+    """
+    The type/category of ML model (e.g. classification, regression)
+    """
     type: String
 
+    """
+    Mapping of hyperparameter configurations
+    """
     hyperParameters: HyperParameterMap
 
-    hyperParams:  [MLHyperParam]
+    """
+    List of hyperparameter settings used to train this model
+    """
+    hyperParams: [MLHyperParam]
 
+    """
+    Performance metrics from model training
+    """
     trainingMetrics: [MLMetric]
 
+    """
+    Names of ML features used by this model
+    """
     mlFeatures: [String!]
 
+    """
+    Tags for categorizing and searching models
+    """
     tags: [String!]
 
+    """
+    Model groups this model belongs to
+    """
     groups: [MLModelGroup]
 
+    """
+    Additional custom properties specific to this model
+    """
     customProperties: [CustomPropertiesEntry!]
 
+    """
+    URL to view this model in external system
+    """
     externalUrl: String
+
+    """
+    When this model was created
+    """
+    created: AuditStamp
+
+    """
+    Deprecated timestamp for model creation
+    @deprecated Use 'created' field instead
+    """
+    date: Long @deprecated(reason: "Use `created` instead")
 }
 
 type MLFeatureProperties {
@@ -12804,3 +12905,92 @@ type CronSchedule {
     """
     timezone: String!
 }
+
+
+"""
+Properties describing a data process instance's execution metadata
+"""
+type DataProcessInstanceProperties {
+    """
+    The display name of this process instance
+    """
+    name: String!
+
+    """
+    URL to view this process instance in the external system
+    """
+    externalUrl: String
+
+    """
+    When this process instance was created
+    """
+    created: AuditStamp
+
+    """
+    Additional custom properties specific to this process instance
+    """
+    customProperties: [CustomPropertiesEntry!]
+}
+
+"""
+Properties specific to an ML model training run instance
+"""
+type MLTrainingRunProperties {
+    """
+    Unique identifier for this training run
+    """
+    id: String
+
+    """
+    List of URLs to access training run outputs (e.g. model artifacts, logs)
+    """
+    outputUrls: [String]
+    
+    """
+    Hyperparameters used in this training run
+    """
+    hyperParams: [MLHyperParam]
+
+    """
+    Performance metrics recorded during this training run
+    """
+    trainingMetrics: [MLMetric]
+}
+
+extend type DataProcessInstance {
+
+    """
+    Additional read only properties associated with the Data Job
+    """
+    properties: DataProcessInstanceProperties
+
+    """
+    The specific instance of the data platform that this entity belongs to
+    """
+    dataPlatformInstance: DataPlatformInstance
+
+    """
+    Sub Types that this entity implements
+    """
+    subTypes: SubTypes
+
+    """
+    The parent container in which the entity resides
+    """
+    container: Container
+
+    """
+    Standardized platform urn where the data process instance is defined
+    """
+    platform: DataPlatform!
+
+    """
+    Recursively get the lineage of containers for this entity
+    """
+    parentContainers: ParentContainersResult
+
+    """
+    Additional properties when subtype is Training Run
+    """
+    mlTrainingRunProperties: MLTrainingRunProperties
+}
diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceOutput.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceOutput.pdl
index f33c41e63efed6..fe782dbe01ca9b 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceOutput.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceOutput.pdl
@@ -15,7 +15,7 @@ record DataProcessInstanceOutput {
   @Relationship = {
     "/*": {
       "name": "Produces",
-      "entityTypes": [ "dataset" ]
+      "entityTypes": [ "dataset", "mlModel" ]
     }
   }
   @Searchable = {
diff --git a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceProperties.pdl
index c63cb1a97c017d..5c6bfaecf1ef4d 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceProperties.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/dataprocess/DataProcessInstanceProperties.pdl
@@ -52,4 +52,4 @@ record DataProcessInstanceProperties includes CustomProperties, ExternalReferenc
   }
   created: AuditStamp
 
-}
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl
index b54e430038082d..81c5e7a240f618 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelGroupProperties.pdl
@@ -4,6 +4,7 @@ import com.linkedin.common.Urn
 import com.linkedin.common.Time
 import com.linkedin.common.VersionTag
 import com.linkedin.common.CustomProperties
+import com.linkedin.common.TimeStamp
 
 /**
  * Properties associated with an ML Model Group
@@ -13,6 +14,17 @@ import com.linkedin.common.CustomProperties
 }
 record MLModelGroupProperties includes CustomProperties {
 
+  /**
+   * Display name of the MLModelGroup
+   */
+  @Searchable = {
+    "fieldType": "WORD_GRAM",
+    "enableAutocomplete": true,
+    "boostScore": 10.0,
+    "queryByDefault": true,
+  }
+  name: optional string
+
   /**
    * Documentation of the MLModelGroup
    */
@@ -25,8 +37,31 @@ record MLModelGroupProperties includes CustomProperties {
   /**
    * Date when the MLModelGroup was developed
    */
+  @deprecated
   createdAt: optional Time
 
+  /**
+   * Time and Actor who created the MLModelGroup
+   */
+  created: optional TimeStamp
+
+  /**
+   * Date when the MLModelGroup was last modified
+   */
+  lastModified: optional TimeStamp
+
+  /**
+   * List of jobs (if any) used to train the model group. Visible in Lineage.
+   */
+  @Relationship = {
+    "/*": {
+      "name": "TrainedBy",
+      "entityTypes": [ "dataJob" ],
+      "isLineage": true
+    }
+  }
+  trainingJobs: optional array[Urn]
+
   /**
    * Version of the MLModelGroup
    */
diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl
index 621a3e1747b504..d89d07384bba1d 100644
--- a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl
+++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLModelProperties.pdl
@@ -6,6 +6,7 @@ import com.linkedin.common.Time
 import com.linkedin.common.VersionTag
 import com.linkedin.common.CustomProperties
 import com.linkedin.common.ExternalReference
+import com.linkedin.common.TimeStamp
 
 /**
  * Properties associated with a ML Model
@@ -15,6 +16,18 @@ import com.linkedin.common.ExternalReference
 }
 record MLModelProperties includes CustomProperties, ExternalReference {
 
+  /**
+   * Display name of the MLModel
+   */
+  @Searchable = {
+    "fieldType": "WORD_GRAM",
+    "enableAutocomplete": true,
+    "boostScore": 10.0,
+    "queryByDefault": true,
+  }
+  name: optional string
+
+
   /**
    * Documentation of the MLModel
    */
@@ -27,8 +40,19 @@ record MLModelProperties includes CustomProperties, ExternalReference {
   /**
    * Date when the MLModel was developed
    */
+  @deprecated
   date: optional Time
 
+  /**
+   * Audit stamp containing who created this and when
+   */
+  created: optional TimeStamp
+
+  /**
+   * Date when the MLModel was last modified
+   */
+  lastModified: optional TimeStamp
+
   /**
    * Version of the MLModel
    */
@@ -93,12 +117,12 @@ record MLModelProperties includes CustomProperties, ExternalReference {
   deployments: optional array[Urn]
 
   /**
-   * List of jobs (if any) used to train the model
+   * List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.
    */
   @Relationship = {
     "/*": {
       "name": "TrainedBy",
-      "entityTypes": [ "dataJob" ],
+      "entityTypes": [ "dataJob", "dataProcessInstance" ],
       "isLineage": true
     }
   }
diff --git a/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLTrainingRunProperties.pdl b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLTrainingRunProperties.pdl
new file mode 100644
index 00000000000000..f8b8eeafe908b7
--- /dev/null
+++ b/metadata-models/src/main/pegasus/com/linkedin/ml/metadata/MLTrainingRunProperties.pdl
@@ -0,0 +1,36 @@
+namespace com.linkedin.ml.metadata
+
+import com.linkedin.common.AuditStamp
+import com.linkedin.common.CustomProperties
+import com.linkedin.common.ExternalReference
+import com.linkedin.common.Urn
+import com.linkedin.common.JobFlowUrn
+import com.linkedin.common.DataJobUrn
+/**
+ * The inputs and outputs of this training run
+ */
+@Aspect = {
+  "name": "mlTrainingRunProperties",
+}
+record MLTrainingRunProperties includes CustomProperties, ExternalReference {
+
+  /**
+   * Run Id of the ML Training Run
+   */
+  id: optional string
+
+  /**
+   * List of URLs for the Outputs of the ML Training Run
+   */
+  outputUrls: optional array[string]
+
+  /**
+   * Hyperparameters of the ML Training Run
+   */
+  hyperParams: optional array[MLHyperParam]
+
+  /**
+   * Metrics of the ML Training Run
+   */
+  trainingMetrics: optional array[MLMetric]
+}
\ No newline at end of file
diff --git a/metadata-models/src/main/resources/entity-registry.yml b/metadata-models/src/main/resources/entity-registry.yml
index 1c3eb5b574e204..4fe170ced69f33 100644
--- a/metadata-models/src/main/resources/entity-registry.yml
+++ b/metadata-models/src/main/resources/entity-registry.yml
@@ -116,6 +116,10 @@ entities:
       - dataProcessInstanceRunEvent
       - status
       - testResults
+      - dataPlatformInstance
+      - subTypes
+      - container
+      - mlTrainingRunProperties
   - name: chart
     category: core
     keyAspect: chartKey
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
index 827789130d8bbb..1c713fd33884b5 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.aspects.snapshot.json
@@ -3826,12 +3826,23 @@
       "type" : "record",
       "name" : "MLModelProperties",
       "namespace" : "com.linkedin.ml.metadata",
-      "doc" : "Properties associated with a ML Model",
+      "doc" : "Properties associated with a ML Model\r",
       "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ],
       "fields" : [ {
+        "name" : "name",
+        "type" : "string",
+        "doc" : "Display name of the MLModel\r",
+        "optional" : true,
+        "Searchable" : {
+          "boostScore" : 10.0,
+          "enableAutocomplete" : true,
+          "fieldType" : "WORD_GRAM",
+          "queryByDefault" : true
+        }
+      }, {
         "name" : "description",
         "type" : "string",
-        "doc" : "Documentation of the MLModel",
+        "doc" : "Documentation of the MLModel\r",
         "optional" : true,
         "Searchable" : {
           "fieldType" : "TEXT",
@@ -3840,17 +3851,28 @@
       }, {
         "name" : "date",
         "type" : "com.linkedin.common.Time",
-        "doc" : "Date when the MLModel was developed",
+        "doc" : "Date when the MLModel was developed\r",
+        "optional" : true,
+        "deprecated" : true
+      }, {
+        "name" : "created",
+        "type" : "com.linkedin.common.TimeStamp",
+        "doc" : "Audit stamp containing who created this and when\r",
+        "optional" : true
+      }, {
+        "name" : "lastModified",
+        "type" : "com.linkedin.common.TimeStamp",
+        "doc" : "Date when the MLModel was last modified\r",
         "optional" : true
       }, {
         "name" : "version",
         "type" : "com.linkedin.common.VersionTag",
-        "doc" : "Version of the MLModel",
+        "doc" : "Version of the MLModel\r",
         "optional" : true
       }, {
         "name" : "type",
         "type" : "string",
-        "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc",
+        "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc\r",
         "optional" : true,
         "Searchable" : {
           "fieldType" : "TEXT_PARTIAL"
@@ -3866,7 +3888,7 @@
             "ref" : [ "string", "int", "float", "double", "boolean" ]
           }
         },
-        "doc" : "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams",
+        "doc" : "Hyper Parameters of the MLModel\r\n\r\nNOTE: these are deprecated in favor of hyperParams\r",
         "optional" : true
       }, {
         "name" : "hyperParams",
@@ -3901,7 +3923,7 @@
             }
           }
         },
-        "doc" : "Hyperparameters of the MLModel",
+        "doc" : "Hyperparameters of the MLModel\r",
         "optional" : true
       }, {
         "name" : "trainingMetrics",
@@ -3936,7 +3958,7 @@
             }
           }
         },
-        "doc" : "Metrics of the MLModel used in training",
+        "doc" : "Metrics of the MLModel used in training\r",
         "optional" : true
       }, {
         "name" : "onlineMetrics",
@@ -3944,7 +3966,7 @@
           "type" : "array",
           "items" : "MLMetric"
         },
-        "doc" : "Metrics of the MLModel used in production",
+        "doc" : "Metrics of the MLModel used in production\r",
         "optional" : true
       }, {
         "name" : "mlFeatures",
@@ -3952,7 +3974,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.MLFeatureUrn"
         },
-        "doc" : "List of features used for MLModel training",
+        "doc" : "List of features used for MLModel training\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
@@ -3967,7 +3989,7 @@
           "type" : "array",
           "items" : "string"
         },
-        "doc" : "Tags for the MLModel",
+        "doc" : "Tags for the MLModel\r",
         "default" : [ ]
       }, {
         "name" : "deployments",
@@ -3975,7 +3997,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "Deployments for the MLModel",
+        "doc" : "Deployments for the MLModel\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
@@ -3989,11 +4011,11 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "List of jobs (if any) used to train the model",
+        "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
-            "entityTypes" : [ "dataJob" ],
+            "entityTypes" : [ "dataJob", "dataProcessInstance" ],
             "isLineage" : true,
             "name" : "TrainedBy"
           }
@@ -4004,7 +4026,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "List of jobs (if any) that use the model",
+        "doc" : "List of jobs (if any) that use the model\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
@@ -4020,7 +4042,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "Groups the model belongs to",
+        "doc" : "Groups the model belongs to\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
index b549cef0af84b2..77d4644f3c121a 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.entities.snapshot.json
@@ -3984,12 +3984,23 @@
                   "type" : "record",
                   "name" : "MLModelProperties",
                   "namespace" : "com.linkedin.ml.metadata",
-                  "doc" : "Properties associated with a ML Model",
+                  "doc" : "Properties associated with a ML Model\r",
                   "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ],
                   "fields" : [ {
+                    "name" : "name",
+                    "type" : "string",
+                    "doc" : "Display name of the MLModel\r",
+                    "optional" : true,
+                    "Searchable" : {
+                      "boostScore" : 10.0,
+                      "enableAutocomplete" : true,
+                      "fieldType" : "WORD_GRAM",
+                      "queryByDefault" : true
+                    }
+                  }, {
                     "name" : "description",
                     "type" : "string",
-                    "doc" : "Documentation of the MLModel",
+                    "doc" : "Documentation of the MLModel\r",
                     "optional" : true,
                     "Searchable" : {
                       "fieldType" : "TEXT",
@@ -3998,17 +4009,28 @@
                   }, {
                     "name" : "date",
                     "type" : "com.linkedin.common.Time",
-                    "doc" : "Date when the MLModel was developed",
+                    "doc" : "Date when the MLModel was developed\r",
+                    "optional" : true,
+                    "deprecated" : true
+                  }, {
+                    "name" : "created",
+                    "type" : "com.linkedin.common.TimeStamp",
+                    "doc" : "Audit stamp containing who created this and when\r",
+                    "optional" : true
+                  }, {
+                    "name" : "lastModified",
+                    "type" : "com.linkedin.common.TimeStamp",
+                    "doc" : "Date when the MLModel was last modified\r",
                     "optional" : true
                   }, {
                     "name" : "version",
                     "type" : "com.linkedin.common.VersionTag",
-                    "doc" : "Version of the MLModel",
+                    "doc" : "Version of the MLModel\r",
                     "optional" : true
                   }, {
                     "name" : "type",
                     "type" : "string",
-                    "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc",
+                    "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc\r",
                     "optional" : true,
                     "Searchable" : {
                       "fieldType" : "TEXT_PARTIAL"
@@ -4024,7 +4046,7 @@
                         "ref" : [ "string", "int", "float", "double", "boolean" ]
                       }
                     },
-                    "doc" : "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams",
+                    "doc" : "Hyper Parameters of the MLModel\r\n\r\nNOTE: these are deprecated in favor of hyperParams\r",
                     "optional" : true
                   }, {
                     "name" : "hyperParams",
@@ -4059,7 +4081,7 @@
                         }
                       }
                     },
-                    "doc" : "Hyperparameters of the MLModel",
+                    "doc" : "Hyperparameters of the MLModel\r",
                     "optional" : true
                   }, {
                     "name" : "trainingMetrics",
@@ -4094,7 +4116,7 @@
                         }
                       }
                     },
-                    "doc" : "Metrics of the MLModel used in training",
+                    "doc" : "Metrics of the MLModel used in training\r",
                     "optional" : true
                   }, {
                     "name" : "onlineMetrics",
@@ -4102,7 +4124,7 @@
                       "type" : "array",
                       "items" : "MLMetric"
                     },
-                    "doc" : "Metrics of the MLModel used in production",
+                    "doc" : "Metrics of the MLModel used in production\r",
                     "optional" : true
                   }, {
                     "name" : "mlFeatures",
@@ -4110,7 +4132,7 @@
                       "type" : "array",
                       "items" : "com.linkedin.common.MLFeatureUrn"
                     },
-                    "doc" : "List of features used for MLModel training",
+                    "doc" : "List of features used for MLModel training\r",
                     "optional" : true,
                     "Relationship" : {
                       "/*" : {
@@ -4125,7 +4147,7 @@
                       "type" : "array",
                       "items" : "string"
                     },
-                    "doc" : "Tags for the MLModel",
+                    "doc" : "Tags for the MLModel\r",
                     "default" : [ ]
                   }, {
                     "name" : "deployments",
@@ -4133,7 +4155,7 @@
                       "type" : "array",
                       "items" : "com.linkedin.common.Urn"
                     },
-                    "doc" : "Deployments for the MLModel",
+                    "doc" : "Deployments for the MLModel\r",
                     "optional" : true,
                     "Relationship" : {
                       "/*" : {
@@ -4147,11 +4169,11 @@
                       "type" : "array",
                       "items" : "com.linkedin.common.Urn"
                     },
-                    "doc" : "List of jobs (if any) used to train the model",
+                    "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r",
                     "optional" : true,
                     "Relationship" : {
                       "/*" : {
-                        "entityTypes" : [ "dataJob" ],
+                        "entityTypes" : [ "dataJob", "dataProcessInstance" ],
                         "isLineage" : true,
                         "name" : "TrainedBy"
                       }
@@ -4162,7 +4184,7 @@
                       "type" : "array",
                       "items" : "com.linkedin.common.Urn"
                     },
-                    "doc" : "List of jobs (if any) that use the model",
+                    "doc" : "List of jobs (if any) that use the model\r",
                     "optional" : true,
                     "Relationship" : {
                       "/*" : {
@@ -4178,7 +4200,7 @@
                       "type" : "array",
                       "items" : "com.linkedin.common.Urn"
                     },
-                    "doc" : "Groups the model belongs to",
+                    "doc" : "Groups the model belongs to\r",
                     "optional" : true,
                     "Relationship" : {
                       "/*" : {
@@ -4981,12 +5003,23 @@
                   "type" : "record",
                   "name" : "MLModelGroupProperties",
                   "namespace" : "com.linkedin.ml.metadata",
-                  "doc" : "Properties associated with an ML Model Group",
+                  "doc" : "Properties associated with an ML Model Group\r",
                   "include" : [ "com.linkedin.common.CustomProperties" ],
                   "fields" : [ {
+                    "name" : "name",
+                    "type" : "string",
+                    "doc" : "Display name of the MLModelGroup\r",
+                    "optional" : true,
+                    "Searchable" : {
+                      "boostScore" : 10.0,
+                      "enableAutocomplete" : true,
+                      "fieldType" : "WORD_GRAM",
+                      "queryByDefault" : true
+                    }
+                  }, {
                     "name" : "description",
                     "type" : "string",
-                    "doc" : "Documentation of the MLModelGroup",
+                    "doc" : "Documentation of the MLModelGroup\r",
                     "optional" : true,
                     "Searchable" : {
                       "fieldType" : "TEXT",
@@ -4995,12 +5028,38 @@
                   }, {
                     "name" : "createdAt",
                     "type" : "com.linkedin.common.Time",
-                    "doc" : "Date when the MLModelGroup was developed",
+                    "doc" : "Date when the MLModelGroup was developed\r",
+                    "optional" : true,
+                    "deprecated" : true
+                  }, {
+                    "name" : "created",
+                    "type" : "com.linkedin.common.TimeStamp",
+                    "doc" : "Time and Actor who created the MLModelGroup\r",
+                    "optional" : true
+                  }, {
+                    "name" : "lastModified",
+                    "type" : "com.linkedin.common.TimeStamp",
+                    "doc" : "Date when the MLModelGroup was last modified\r",
                     "optional" : true
+                  }, {
+                    "name" : "trainingJobs",
+                    "type" : {
+                      "type" : "array",
+                      "items" : "com.linkedin.common.Urn"
+                    },
+                    "doc" : "List of jobs (if any) used to train the model group. Visible in Lineage.\r",
+                    "optional" : true,
+                    "Relationship" : {
+                      "/*" : {
+                        "entityTypes" : [ "dataJob" ],
+                        "isLineage" : true,
+                        "name" : "TrainedBy"
+                      }
+                    }
                   }, {
                     "name" : "version",
                     "type" : "com.linkedin.common.VersionTag",
-                    "doc" : "Version of the MLModelGroup",
+                    "doc" : "Version of the MLModelGroup\r",
                     "optional" : true
                   } ],
                   "Aspect" : {
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
index c8be9d063eaea9..8b6def75f7a665 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.entity.runs.snapshot.json
@@ -3550,12 +3550,23 @@
       "type" : "record",
       "name" : "MLModelProperties",
       "namespace" : "com.linkedin.ml.metadata",
-      "doc" : "Properties associated with a ML Model",
+      "doc" : "Properties associated with a ML Model\r",
       "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ],
       "fields" : [ {
+        "name" : "name",
+        "type" : "string",
+        "doc" : "Display name of the MLModel\r",
+        "optional" : true,
+        "Searchable" : {
+          "boostScore" : 10.0,
+          "enableAutocomplete" : true,
+          "fieldType" : "WORD_GRAM",
+          "queryByDefault" : true
+        }
+      }, {
         "name" : "description",
         "type" : "string",
-        "doc" : "Documentation of the MLModel",
+        "doc" : "Documentation of the MLModel\r",
         "optional" : true,
         "Searchable" : {
           "fieldType" : "TEXT",
@@ -3564,17 +3575,28 @@
       }, {
         "name" : "date",
         "type" : "com.linkedin.common.Time",
-        "doc" : "Date when the MLModel was developed",
+        "doc" : "Date when the MLModel was developed\r",
+        "optional" : true,
+        "deprecated" : true
+      }, {
+        "name" : "created",
+        "type" : "com.linkedin.common.TimeStamp",
+        "doc" : "Audit stamp containing who created this and when\r",
+        "optional" : true
+      }, {
+        "name" : "lastModified",
+        "type" : "com.linkedin.common.TimeStamp",
+        "doc" : "Date when the MLModel was last modified\r",
         "optional" : true
       }, {
         "name" : "version",
         "type" : "com.linkedin.common.VersionTag",
-        "doc" : "Version of the MLModel",
+        "doc" : "Version of the MLModel\r",
         "optional" : true
       }, {
         "name" : "type",
         "type" : "string",
-        "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc",
+        "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc\r",
         "optional" : true,
         "Searchable" : {
           "fieldType" : "TEXT_PARTIAL"
@@ -3590,7 +3612,7 @@
             "ref" : [ "string", "int", "float", "double", "boolean" ]
           }
         },
-        "doc" : "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams",
+        "doc" : "Hyper Parameters of the MLModel\r\n\r\nNOTE: these are deprecated in favor of hyperParams\r",
         "optional" : true
       }, {
         "name" : "hyperParams",
@@ -3625,7 +3647,7 @@
             }
           }
         },
-        "doc" : "Hyperparameters of the MLModel",
+        "doc" : "Hyperparameters of the MLModel\r",
         "optional" : true
       }, {
         "name" : "trainingMetrics",
@@ -3660,7 +3682,7 @@
             }
           }
         },
-        "doc" : "Metrics of the MLModel used in training",
+        "doc" : "Metrics of the MLModel used in training\r",
         "optional" : true
       }, {
         "name" : "onlineMetrics",
@@ -3668,7 +3690,7 @@
           "type" : "array",
           "items" : "MLMetric"
         },
-        "doc" : "Metrics of the MLModel used in production",
+        "doc" : "Metrics of the MLModel used in production\r",
         "optional" : true
       }, {
         "name" : "mlFeatures",
@@ -3676,7 +3698,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.MLFeatureUrn"
         },
-        "doc" : "List of features used for MLModel training",
+        "doc" : "List of features used for MLModel training\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
@@ -3691,7 +3713,7 @@
           "type" : "array",
           "items" : "string"
         },
-        "doc" : "Tags for the MLModel",
+        "doc" : "Tags for the MLModel\r",
         "default" : [ ]
       }, {
         "name" : "deployments",
@@ -3699,7 +3721,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "Deployments for the MLModel",
+        "doc" : "Deployments for the MLModel\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
@@ -3713,11 +3735,11 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "List of jobs (if any) used to train the model",
+        "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
-            "entityTypes" : [ "dataJob" ],
+            "entityTypes" : [ "dataJob", "dataProcessInstance" ],
             "isLineage" : true,
             "name" : "TrainedBy"
           }
@@ -3728,7 +3750,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "List of jobs (if any) that use the model",
+        "doc" : "List of jobs (if any) that use the model\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
@@ -3744,7 +3766,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "Groups the model belongs to",
+        "doc" : "Groups the model belongs to\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json
index 8c7595c5e505d8..e4cc5c42303ee2 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.operations.operations.snapshot.json
@@ -3544,12 +3544,23 @@
       "type" : "record",
       "name" : "MLModelProperties",
       "namespace" : "com.linkedin.ml.metadata",
-      "doc" : "Properties associated with a ML Model",
+      "doc" : "Properties associated with a ML Model\r",
       "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ],
       "fields" : [ {
+        "name" : "name",
+        "type" : "string",
+        "doc" : "Display name of the MLModel\r",
+        "optional" : true,
+        "Searchable" : {
+          "boostScore" : 10.0,
+          "enableAutocomplete" : true,
+          "fieldType" : "WORD_GRAM",
+          "queryByDefault" : true
+        }
+      }, {
         "name" : "description",
         "type" : "string",
-        "doc" : "Documentation of the MLModel",
+        "doc" : "Documentation of the MLModel\r",
         "optional" : true,
         "Searchable" : {
           "fieldType" : "TEXT",
@@ -3558,17 +3569,28 @@
       }, {
         "name" : "date",
         "type" : "com.linkedin.common.Time",
-        "doc" : "Date when the MLModel was developed",
+        "doc" : "Date when the MLModel was developed\r",
+        "optional" : true,
+        "deprecated" : true
+      }, {
+        "name" : "created",
+        "type" : "com.linkedin.common.TimeStamp",
+        "doc" : "Audit stamp containing who created this and when\r",
+        "optional" : true
+      }, {
+        "name" : "lastModified",
+        "type" : "com.linkedin.common.TimeStamp",
+        "doc" : "Date when the MLModel was last modified\r",
         "optional" : true
       }, {
         "name" : "version",
         "type" : "com.linkedin.common.VersionTag",
-        "doc" : "Version of the MLModel",
+        "doc" : "Version of the MLModel\r",
         "optional" : true
       }, {
         "name" : "type",
         "type" : "string",
-        "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc",
+        "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc\r",
         "optional" : true,
         "Searchable" : {
           "fieldType" : "TEXT_PARTIAL"
@@ -3584,7 +3606,7 @@
             "ref" : [ "string", "int", "float", "double", "boolean" ]
           }
         },
-        "doc" : "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams",
+        "doc" : "Hyper Parameters of the MLModel\r\n\r\nNOTE: these are deprecated in favor of hyperParams\r",
         "optional" : true
       }, {
         "name" : "hyperParams",
@@ -3619,7 +3641,7 @@
             }
           }
         },
-        "doc" : "Hyperparameters of the MLModel",
+        "doc" : "Hyperparameters of the MLModel\r",
         "optional" : true
       }, {
         "name" : "trainingMetrics",
@@ -3654,7 +3676,7 @@
             }
           }
         },
-        "doc" : "Metrics of the MLModel used in training",
+        "doc" : "Metrics of the MLModel used in training\r",
         "optional" : true
       }, {
         "name" : "onlineMetrics",
@@ -3662,7 +3684,7 @@
           "type" : "array",
           "items" : "MLMetric"
         },
-        "doc" : "Metrics of the MLModel used in production",
+        "doc" : "Metrics of the MLModel used in production\r",
         "optional" : true
       }, {
         "name" : "mlFeatures",
@@ -3670,7 +3692,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.MLFeatureUrn"
         },
-        "doc" : "List of features used for MLModel training",
+        "doc" : "List of features used for MLModel training\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
@@ -3685,7 +3707,7 @@
           "type" : "array",
           "items" : "string"
         },
-        "doc" : "Tags for the MLModel",
+        "doc" : "Tags for the MLModel\r",
         "default" : [ ]
       }, {
         "name" : "deployments",
@@ -3693,7 +3715,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "Deployments for the MLModel",
+        "doc" : "Deployments for the MLModel\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
@@ -3707,11 +3729,11 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "List of jobs (if any) used to train the model",
+        "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
-            "entityTypes" : [ "dataJob" ],
+            "entityTypes" : [ "dataJob", "dataProcessInstance" ],
             "isLineage" : true,
             "name" : "TrainedBy"
           }
@@ -3722,7 +3744,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "List of jobs (if any) that use the model",
+        "doc" : "List of jobs (if any) that use the model\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
@@ -3738,7 +3760,7 @@
           "type" : "array",
           "items" : "com.linkedin.common.Urn"
         },
-        "doc" : "Groups the model belongs to",
+        "doc" : "Groups the model belongs to\r",
         "optional" : true,
         "Relationship" : {
           "/*" : {
diff --git a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json
index 75e5c9a559076b..e375ac698ab516 100644
--- a/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json
+++ b/metadata-service/restli-api/src/main/snapshot/com.linkedin.platform.platform.snapshot.json
@@ -3978,12 +3978,23 @@
                   "type" : "record",
                   "name" : "MLModelProperties",
                   "namespace" : "com.linkedin.ml.metadata",
-                  "doc" : "Properties associated with a ML Model",
+                  "doc" : "Properties associated with a ML Model\r",
                   "include" : [ "com.linkedin.common.CustomProperties", "com.linkedin.common.ExternalReference" ],
                   "fields" : [ {
+                    "name" : "name",
+                    "type" : "string",
+                    "doc" : "Display name of the MLModel\r",
+                    "optional" : true,
+                    "Searchable" : {
+                      "boostScore" : 10.0,
+                      "enableAutocomplete" : true,
+                      "fieldType" : "WORD_GRAM",
+                      "queryByDefault" : true
+                    }
+                  }, {
                     "name" : "description",
                     "type" : "string",
-                    "doc" : "Documentation of the MLModel",
+                    "doc" : "Documentation of the MLModel\r",
                     "optional" : true,
                     "Searchable" : {
                       "fieldType" : "TEXT",
@@ -3992,17 +4003,28 @@
                   }, {
                     "name" : "date",
                     "type" : "com.linkedin.common.Time",
-                    "doc" : "Date when the MLModel was developed",
+                    "doc" : "Date when the MLModel was developed\r",
+                    "optional" : true,
+                    "deprecated" : true
+                  }, {
+                    "name" : "created",
+                    "type" : "com.linkedin.common.TimeStamp",
+                    "doc" : "Audit stamp containing who created this and when\r",
+                    "optional" : true
+                  }, {
+                    "name" : "lastModified",
+                    "type" : "com.linkedin.common.TimeStamp",
+                    "doc" : "Date when the MLModel was last modified\r",
                     "optional" : true
                   }, {
                     "name" : "version",
                     "type" : "com.linkedin.common.VersionTag",
-                    "doc" : "Version of the MLModel",
+                    "doc" : "Version of the MLModel\r",
                     "optional" : true
                   }, {
                     "name" : "type",
                     "type" : "string",
-                    "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc",
+                    "doc" : "Type of Algorithm or MLModel such as whether it is a Naive Bayes classifier, Convolutional Neural Network, etc\r",
                     "optional" : true,
                     "Searchable" : {
                       "fieldType" : "TEXT_PARTIAL"
@@ -4018,7 +4040,7 @@
                         "ref" : [ "string", "int", "float", "double", "boolean" ]
                       }
                     },
-                    "doc" : "Hyper Parameters of the MLModel\n\nNOTE: these are deprecated in favor of hyperParams",
+                    "doc" : "Hyper Parameters of the MLModel\r\n\r\nNOTE: these are deprecated in favor of hyperParams\r",
                     "optional" : true
                   }, {
                     "name" : "hyperParams",
@@ -4053,7 +4075,7 @@
                         }
                       }
                     },
-                    "doc" : "Hyperparameters of the MLModel",
+                    "doc" : "Hyperparameters of the MLModel\r",
                     "optional" : true
                   }, {
                     "name" : "trainingMetrics",
@@ -4088,7 +4110,7 @@
                         }
                       }
                     },
-                    "doc" : "Metrics of the MLModel used in training",
+                    "doc" : "Metrics of the MLModel used in training\r",
                     "optional" : true
                   }, {
                     "name" : "onlineMetrics",
@@ -4096,7 +4118,7 @@
                       "type" : "array",
                       "items" : "MLMetric"
                     },
-                    "doc" : "Metrics of the MLModel used in production",
+                    "doc" : "Metrics of the MLModel used in production\r",
                     "optional" : true
                   }, {
                     "name" : "mlFeatures",
@@ -4104,7 +4126,7 @@
                       "type" : "array",
                       "items" : "com.linkedin.common.MLFeatureUrn"
                     },
-                    "doc" : "List of features used for MLModel training",
+                    "doc" : "List of features used for MLModel training\r",
                     "optional" : true,
                     "Relationship" : {
                       "/*" : {
@@ -4119,7 +4141,7 @@
                       "type" : "array",
                       "items" : "string"
                     },
-                    "doc" : "Tags for the MLModel",
+                    "doc" : "Tags for the MLModel\r",
                     "default" : [ ]
                   }, {
                     "name" : "deployments",
@@ -4127,7 +4149,7 @@
                       "type" : "array",
                       "items" : "com.linkedin.common.Urn"
                     },
-                    "doc" : "Deployments for the MLModel",
+                    "doc" : "Deployments for the MLModel\r",
                     "optional" : true,
                     "Relationship" : {
                       "/*" : {
@@ -4141,11 +4163,11 @@
                       "type" : "array",
                       "items" : "com.linkedin.common.Urn"
                     },
-                    "doc" : "List of jobs (if any) used to train the model",
+                    "doc" : "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.\r",
                     "optional" : true,
                     "Relationship" : {
                       "/*" : {
-                        "entityTypes" : [ "dataJob" ],
+                        "entityTypes" : [ "dataJob", "dataProcessInstance" ],
                         "isLineage" : true,
                         "name" : "TrainedBy"
                       }
@@ -4156,7 +4178,7 @@
                       "type" : "array",
                       "items" : "com.linkedin.common.Urn"
                     },
-                    "doc" : "List of jobs (if any) that use the model",
+                    "doc" : "List of jobs (if any) that use the model\r",
                     "optional" : true,
                     "Relationship" : {
                       "/*" : {
@@ -4172,7 +4194,7 @@
                       "type" : "array",
                       "items" : "com.linkedin.common.Urn"
                     },
-                    "doc" : "Groups the model belongs to",
+                    "doc" : "Groups the model belongs to\r",
                     "optional" : true,
                     "Relationship" : {
                       "/*" : {
@@ -4975,12 +4997,23 @@
                   "type" : "record",
                   "name" : "MLModelGroupProperties",
                   "namespace" : "com.linkedin.ml.metadata",
-                  "doc" : "Properties associated with an ML Model Group",
+                  "doc" : "Properties associated with an ML Model Group\r",
                   "include" : [ "com.linkedin.common.CustomProperties" ],
                   "fields" : [ {
+                    "name" : "name",
+                    "type" : "string",
+                    "doc" : "Display name of the MLModelGroup\r",
+                    "optional" : true,
+                    "Searchable" : {
+                      "boostScore" : 10.0,
+                      "enableAutocomplete" : true,
+                      "fieldType" : "WORD_GRAM",
+                      "queryByDefault" : true
+                    }
+                  }, {
                     "name" : "description",
                     "type" : "string",
-                    "doc" : "Documentation of the MLModelGroup",
+                    "doc" : "Documentation of the MLModelGroup\r",
                     "optional" : true,
                     "Searchable" : {
                       "fieldType" : "TEXT",
@@ -4989,12 +5022,38 @@
                   }, {
                     "name" : "createdAt",
                     "type" : "com.linkedin.common.Time",
-                    "doc" : "Date when the MLModelGroup was developed",
+                    "doc" : "Date when the MLModelGroup was developed\r",
+                    "optional" : true,
+                    "deprecated" : true
+                  }, {
+                    "name" : "created",
+                    "type" : "com.linkedin.common.TimeStamp",
+                    "doc" : "Time and Actor who created the MLModelGroup\r",
+                    "optional" : true
+                  }, {
+                    "name" : "lastModified",
+                    "type" : "com.linkedin.common.TimeStamp",
+                    "doc" : "Date when the MLModelGroup was last modified\r",
                     "optional" : true
+                  }, {
+                    "name" : "trainingJobs",
+                    "type" : {
+                      "type" : "array",
+                      "items" : "com.linkedin.common.Urn"
+                    },
+                    "doc" : "List of jobs (if any) used to train the model group. Visible in Lineage.\r",
+                    "optional" : true,
+                    "Relationship" : {
+                      "/*" : {
+                        "entityTypes" : [ "dataJob" ],
+                        "isLineage" : true,
+                        "name" : "TrainedBy"
+                      }
+                    }
                   }, {
                     "name" : "version",
                     "type" : "com.linkedin.common.VersionTag",
-                    "doc" : "Version of the MLModelGroup",
+                    "doc" : "Version of the MLModelGroup\r",
                     "optional" : true
                   } ],
                   "Aspect" : {

From 09a9b6eef912d8f855a2cc6fdc03032f5ec7a652 Mon Sep 17 00:00:00 2001
From: Andrew Sikowitz <andrew.sikowitz@acryl.io>
Date: Mon, 23 Dec 2024 22:39:57 -0800
Subject: [PATCH 2/2] feat(ingest/looker): Do not emit usage for non-ingested
 dashboards and charts (#11647)

---
 .../ingestion/source/looker/looker_common.py  |   9 +
 .../ingestion/source/looker/looker_source.py  |  22 +-
 .../ingestion/source/looker/looker_usage.py   |  40 +-
 .../looker/looker_mces_usage_history.json     | 364 +++++++++++++++++-
 .../tests/integration/looker/test_looker.py   |  87 ++++-
 5 files changed, 482 insertions(+), 40 deletions(-)

diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
index a66962f962255f..1183916e9b3fef 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_common.py
@@ -1408,6 +1408,15 @@ class LookerDashboardSourceReport(StaleEntityRemovalSourceReport):
     dashboards_with_activity: LossySet[str] = dataclasses_field(
         default_factory=LossySet
     )
+
+    # Entities that don't seem to exist, so we don't emit usage aspects for them despite having usage data
+    dashboards_skipped_for_usage: LossySet[str] = dataclasses_field(
+        default_factory=LossySet
+    )
+    charts_skipped_for_usage: LossySet[str] = dataclasses_field(
+        default_factory=LossySet
+    )
+
     stage_latency: List[StageLatency] = dataclasses_field(default_factory=list)
     _looker_explore_registry: Optional[LookerExploreRegistry] = None
     total_explores: int = 0
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
index 815c5dfb1c0147..8487d5113bc1d3 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_source.py
@@ -68,6 +68,7 @@
     ViewField,
     ViewFieldType,
     gen_model_key,
+    get_urn_looker_element_id,
 )
 from datahub.ingestion.source.looker.looker_config import LookerDashboardSourceConfig
 from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI
@@ -165,6 +166,9 @@ def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext):
         # Required, as we do not ingest all folders but only those that have dashboards/looks
         self.processed_folders: List[str] = []
 
+        # Keep track of ingested chart urns, to omit usage for non-ingested entities
+        self.chart_urns: Set[str] = set()
+
     @staticmethod
     def test_connection(config_dict: dict) -> TestConnectionReport:
         test_report = TestConnectionReport()
@@ -642,6 +646,7 @@ def _make_chart_metadata_events(
         chart_urn = self._make_chart_urn(
             element_id=dashboard_element.get_urn_element_id()
         )
+        self.chart_urns.add(chart_urn)
         chart_snapshot = ChartSnapshot(
             urn=chart_urn,
             aspects=[Status(removed=False)],
@@ -1380,7 +1385,9 @@ def _get_folder_and_ancestors_workunits(
         yield from self._emit_folder_as_container(folder)
 
     def extract_usage_stat(
-        self, looker_dashboards: List[looker_usage.LookerDashboardForUsage]
+        self,
+        looker_dashboards: List[looker_usage.LookerDashboardForUsage],
+        ingested_chart_urns: Set[str],
     ) -> List[MetadataChangeProposalWrapper]:
         looks: List[looker_usage.LookerChartForUsage] = []
         # filter out look from all dashboard
@@ -1391,6 +1398,15 @@ def extract_usage_stat(
 
         # dedup looks
         looks = list({str(look.id): look for look in looks}.values())
+        filtered_looks = []
+        for look in looks:
+            if not look.id:
+                continue
+            chart_urn = self._make_chart_urn(get_urn_looker_element_id(look.id))
+            if chart_urn in ingested_chart_urns:
+                filtered_looks.append(look)
+            else:
+                self.reporter.charts_skipped_for_usage.add(look.id)
 
         # Keep stat generators to generate entity stat aspect later
         stat_generator_config: looker_usage.StatGeneratorConfig = (
@@ -1414,7 +1430,7 @@ def extract_usage_stat(
             stat_generator_config,
             self.reporter,
             self._make_chart_urn,
-            looks,
+            filtered_looks,
         )
 
         mcps: List[MetadataChangeProposalWrapper] = []
@@ -1669,7 +1685,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
         if self.source_config.extract_usage_history:
             self.reporter.report_stage_start("usage_extraction")
             usage_mcps: List[MetadataChangeProposalWrapper] = self.extract_usage_stat(
-                looker_dashboards_for_usage
+                looker_dashboards_for_usage, self.chart_urns
             )
             for usage_mcp in usage_mcps:
                 yield usage_mcp.as_workunit()
diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py
index ef7d64e4f42d43..098d7d73a3da84 100644
--- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py
+++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_usage.py
@@ -42,6 +42,7 @@
     TimeWindowSizeClass,
     _Aspect as AspectAbstract,
 )
+from datahub.utilities.lossy_collections import LossySet
 
 logger = logging.getLogger(__name__)
 
@@ -170,7 +171,7 @@ def __init__(
         self.config = config
         self.looker_models = looker_models
         # Later it will help to find out for what are the looker entities from query result
-        self.id_vs_model: Dict[str, ModelForUsage] = {
+        self.id_to_model: Dict[str, ModelForUsage] = {
             self.get_id(looker_object): looker_object for looker_object in looker_models
         }
         self.post_filter = len(self.looker_models) > 100
@@ -225,6 +226,10 @@ def get_id(self, looker_object: ModelForUsage) -> str:
     def get_id_from_row(self, row: dict) -> str:
         pass
 
+    @abstractmethod
+    def report_skip_set(self) -> LossySet[str]:
+        pass
+
     def create_mcp(
         self, model: ModelForUsage, aspect: Aspect
     ) -> MetadataChangeProposalWrapper:
@@ -258,20 +263,11 @@ def _process_entity_timeseries_rows(
 
         return entity_stat_aspect
 
-    def _process_absolute_aspect(self) -> List[Tuple[ModelForUsage, AspectAbstract]]:
-        aspects: List[Tuple[ModelForUsage, AspectAbstract]] = []
-        for looker_object in self.looker_models:
-            aspects.append(
-                (looker_object, self.to_entity_absolute_stat_aspect(looker_object))
-            )
-
-        return aspects
-
     def _fill_user_stat_aspect(
         self,
         entity_usage_stat: Dict[Tuple[str, str], Aspect],
         user_wise_rows: List[Dict],
-    ) -> Iterable[Tuple[ModelForUsage, Aspect]]:
+    ) -> Iterable[Tuple[str, Aspect]]:
         logger.debug("Entering fill user stat aspect")
 
         # We first resolve all the users using a threadpool to warm up the cache
@@ -300,7 +296,7 @@ def _fill_user_stat_aspect(
 
         for row in user_wise_rows:
             # Confirm looker object was given for stat generation
-            looker_object = self.id_vs_model.get(self.get_id_from_row(row))
+            looker_object = self.id_to_model.get(self.get_id_from_row(row))
             if looker_object is None:
                 logger.warning(
                     "Looker object with id({}) was not register with stat generator".format(
@@ -338,7 +334,7 @@ def _fill_user_stat_aspect(
         logger.debug("Starting to yield answers for user-wise counts")
 
         for (id, _), aspect in entity_usage_stat.items():
-            yield self.id_vs_model[id], aspect
+            yield id, aspect
 
     def _execute_query(self, query: LookerQuery, query_name: str) -> List[Dict]:
         rows = []
@@ -357,7 +353,7 @@ def _execute_query(self, query: LookerQuery, query_name: str) -> List[Dict]:
             )
             if self.post_filter:
                 logger.debug("post filtering")
-                rows = [r for r in rows if self.get_id_from_row(r) in self.id_vs_model]
+                rows = [r for r in rows if self.get_id_from_row(r) in self.id_to_model]
                 logger.debug("Filtered down to %d rows", len(rows))
         except Exception as e:
             logger.warning(f"Failed to execute {query_name} query: {e}")
@@ -378,7 +374,8 @@ def generate_usage_stat_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
             return
 
         # yield absolute stat for looker entities
-        for looker_object, aspect in self._process_absolute_aspect():  # type: ignore
+        for looker_object in self.looker_models:
+            aspect = self.to_entity_absolute_stat_aspect(looker_object)
             yield self.create_mcp(looker_object, aspect)
 
         # Execute query and process the raw json which contains stat information
@@ -399,10 +396,13 @@ def generate_usage_stat_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
         )
         user_wise_rows = self._execute_query(user_wise_query_with_filters, "user_query")
         # yield absolute stat for entity
-        for looker_object, aspect in self._fill_user_stat_aspect(
+        for object_id, aspect in self._fill_user_stat_aspect(
             entity_usage_stat, user_wise_rows
         ):
-            yield self.create_mcp(looker_object, aspect)
+            if object_id in self.id_to_model:
+                yield self.create_mcp(self.id_to_model[object_id], aspect)
+            else:
+                self.report_skip_set().add(object_id)
 
 
 class DashboardStatGenerator(BaseStatGenerator):
@@ -425,6 +425,9 @@ def __init__(
     def get_stats_generator_name(self) -> str:
         return "DashboardStats"
 
+    def report_skip_set(self) -> LossySet[str]:
+        return self.report.dashboards_skipped_for_usage
+
     def get_filter(self) -> Dict[ViewField, str]:
         return {
             HistoryViewField.HISTORY_DASHBOARD_ID: ",".join(
@@ -541,6 +544,9 @@ def __init__(
     def get_stats_generator_name(self) -> str:
         return "ChartStats"
 
+    def report_skip_set(self) -> LossySet[str]:
+        return self.report.charts_skipped_for_usage
+
     def get_filter(self) -> Dict[ViewField, str]:
         return {
             LookViewField.LOOK_ID: ",".join(
diff --git a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
index 594983c8fb0f2a..ed0c5401c9029f 100644
--- a/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
+++ b/metadata-ingestion/tests/integration/looker/looker_mces_usage_history.json
@@ -1,4 +1,66 @@
 [
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.ChartSnapshot": {
+            "urn": "urn:li:chart:(looker,dashboard_elements.3)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.chart.ChartInfo": {
+                        "customProperties": {
+                            "upstream_fields": ""
+                        },
+                        "title": "",
+                        "description": "",
+                        "lastModified": {
+                            "created": {
+                                "time": 0,
+                                "actor": "urn:li:corpuser:unknown"
+                            },
+                            "lastModified": {
+                                "time": 0,
+                                "actor": "urn:li:corpuser:unknown"
+                            }
+                        },
+                        "chartUrl": "https://looker.company.com/x/",
+                        "inputs": [
+                            {
+                                "string": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)"
+                            }
+                        ]
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(looker,dashboard_elements.3)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Look"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DashboardSnapshot": {
@@ -9,7 +71,9 @@
                         "customProperties": {},
                         "title": "foo",
                         "description": "lorem ipsum",
-                        "charts": [],
+                        "charts": [
+                            "urn:li:chart:(looker,dashboard_elements.3)"
+                        ],
                         "datasets": [],
                         "dashboards": [],
                         "lastModified": {
@@ -89,6 +153,22 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "chart",
+    "entityUrn": "urn:li:chart:(looker,dashboard_elements.3)",
+    "changeType": "UPSERT",
+    "aspectName": "inputFields",
+    "aspect": {
+        "json": {
+            "fields": []
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "entityType": "dashboard",
     "entityUrn": "urn:li:dashboard:(looker,dashboards.1)",
@@ -215,6 +295,98 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "entityType": "container",
+    "entityUrn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
+    "changeType": "UPSERT",
+    "aspectName": "containerProperties",
+    "aspect": {
+        "json": {
+            "customProperties": {
+                "platform": "looker",
+                "env": "PROD",
+                "model_name": "look_data"
+            },
+            "name": "look_data",
+            "env": "PROD"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "container",
+    "entityUrn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
+    "changeType": "UPSERT",
+    "aspectName": "status",
+    "aspect": {
+        "json": {
+            "removed": false
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "container",
+    "entityUrn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
+    "changeType": "UPSERT",
+    "aspectName": "dataPlatformInstance",
+    "aspect": {
+        "json": {
+            "platform": "urn:li:dataPlatform:looker"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "container",
+    "entityUrn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "LookML Model"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "container",
+    "entityUrn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "Explore"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
@@ -389,6 +561,180 @@
         "lastRunId": "no-run-id-provided"
     }
 },
+{
+    "proposedSnapshot": {
+        "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
+            "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)",
+            "aspects": [
+                {
+                    "com.linkedin.pegasus2avro.common.BrowsePaths": {
+                        "paths": [
+                            "/Explore/look_data"
+                        ]
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.common.Status": {
+                        "removed": false
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.DatasetProperties": {
+                        "customProperties": {
+                            "project": "lkml_samples",
+                            "model": "look_data",
+                            "looker.explore.label": "My Explore View",
+                            "looker.explore.name": "look_view",
+                            "looker.explore.file": "test_source_file.lkml"
+                        },
+                        "externalUrl": "https://looker.company.com/explore/look_data/look_view",
+                        "name": "My Explore View",
+                        "description": "lorem ipsum",
+                        "tags": []
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.dataset.UpstreamLineage": {
+                        "upstreams": [
+                            {
+                                "auditStamp": {
+                                    "time": 1586847600000,
+                                    "actor": "urn:li:corpuser:datahub"
+                                },
+                                "dataset": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.underlying_view,PROD)",
+                                "type": "VIEW"
+                            }
+                        ]
+                    }
+                },
+                {
+                    "com.linkedin.pegasus2avro.schema.SchemaMetadata": {
+                        "schemaName": "look_view",
+                        "platform": "urn:li:dataPlatform:looker",
+                        "version": 0,
+                        "created": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "lastModified": {
+                            "time": 0,
+                            "actor": "urn:li:corpuser:unknown"
+                        },
+                        "hash": "",
+                        "platformSchema": {
+                            "com.linkedin.pegasus2avro.schema.OtherSchema": {
+                                "rawSchema": ""
+                            }
+                        },
+                        "fields": [
+                            {
+                                "fieldPath": "dim1",
+                                "nullable": false,
+                                "description": "dimension one description",
+                                "label": "Dimensions One Label",
+                                "type": {
+                                    "type": {
+                                        "com.linkedin.pegasus2avro.schema.StringType": {}
+                                    }
+                                },
+                                "nativeDataType": "string",
+                                "recursive": false,
+                                "globalTags": {
+                                    "tags": [
+                                        {
+                                            "tag": "urn:li:tag:Dimension"
+                                        }
+                                    ]
+                                },
+                                "isPartOfKey": false
+                            }
+                        ],
+                        "primaryKeys": []
+                    }
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "subTypes",
+    "aspect": {
+        "json": {
+            "typeNames": [
+                "Explore"
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "embed",
+    "aspect": {
+        "json": {
+            "renderUrl": "https://looker.company.com/embed/explore/look_data/look_view"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "container",
+    "aspect": {
+        "json": {
+            "container": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb"
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
+{
+    "entityType": "dataset",
+    "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,look_data.explore.look_view,PROD)",
+    "changeType": "UPSERT",
+    "aspectName": "browsePathsV2",
+    "aspect": {
+        "json": {
+            "path": [
+                {
+                    "id": "Explore"
+                },
+                {
+                    "id": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb",
+                    "urn": "urn:li:container:a2a7aa63752695f9a1705faed9d03ffb"
+                }
+            ]
+        }
+    },
+    "systemMetadata": {
+        "lastObserved": 1586847600000,
+        "runId": "looker-test",
+        "lastRunId": "no-run-id-provided"
+    }
+},
 {
     "proposedSnapshot": {
         "com.linkedin.pegasus2avro.metadata.snapshot.TagSnapshot": {
@@ -747,22 +1093,6 @@
         "lastRunId": "no-run-id-provided"
     }
 },
-{
-    "entityType": "chart",
-    "entityUrn": "urn:li:chart:(looker,dashboard_elements.3)",
-    "changeType": "UPSERT",
-    "aspectName": "status",
-    "aspect": {
-        "json": {
-            "removed": false
-        }
-    },
-    "systemMetadata": {
-        "lastObserved": 1586847600000,
-        "runId": "looker-test",
-        "lastRunId": "no-run-id-provided"
-    }
-},
 {
     "entityType": "tag",
     "entityUrn": "urn:li:tag:Dimension",
diff --git a/metadata-ingestion/tests/integration/looker/test_looker.py b/metadata-ingestion/tests/integration/looker/test_looker.py
index a39de8384efb23..c96bcc729a95da 100644
--- a/metadata-ingestion/tests/integration/looker/test_looker.py
+++ b/metadata-ingestion/tests/integration/looker/test_looker.py
@@ -31,7 +31,10 @@
 from datahub.ingestion.api.source import SourceReport
 from datahub.ingestion.run.pipeline import Pipeline, PipelineInitError
 from datahub.ingestion.source.looker import looker_common, looker_usage
-from datahub.ingestion.source.looker.looker_common import LookerExplore
+from datahub.ingestion.source.looker.looker_common import (
+    LookerDashboardSourceReport,
+    LookerExplore,
+)
 from datahub.ingestion.source.looker.looker_config import LookerCommonConfig
 from datahub.ingestion.source.looker.looker_lib_wrapper import (
     LookerAPI,
@@ -414,7 +417,9 @@ def setup_mock_dashboard_multiple_charts(mocked_client):
     )
 
 
-def setup_mock_dashboard_with_usage(mocked_client):
+def setup_mock_dashboard_with_usage(
+    mocked_client: mock.MagicMock, skip_look: bool = False
+) -> None:
     mocked_client.all_dashboards.return_value = [Dashboard(id="1")]
     mocked_client.dashboard.return_value = Dashboard(
         id="1",
@@ -437,7 +442,13 @@ def setup_mock_dashboard_with_usage(mocked_client):
                 ),
             ),
             DashboardElement(
-                id="3", type="", look=LookWithQuery(id="3", view_count=30)
+                id="3",
+                type="" if skip_look else "vis",  # Looks only ingested if type == `vis`
+                look=LookWithQuery(
+                    id="3",
+                    view_count=30,
+                    query=Query(model="look_data", view="look_view"),
+                ),
             ),
         ],
     )
@@ -611,6 +622,12 @@ def side_effect_query_inline(
                     HistoryViewField.HISTORY_DASHBOARD_USER: 1,
                     HistoryViewField.HISTORY_DASHBOARD_RUN_COUNT: 5,
                 },
+                {
+                    HistoryViewField.HISTORY_DASHBOARD_ID: "5",
+                    HistoryViewField.HISTORY_CREATED_DATE: "2022-07-07",
+                    HistoryViewField.HISTORY_DASHBOARD_USER: 1,
+                    HistoryViewField.HISTORY_DASHBOARD_RUN_COUNT: 5,
+                },
             ]
         ),
         looker_usage.QueryId.DASHBOARD_PER_USER_PER_DAY_USAGE_STAT: json.dumps(
@@ -790,6 +807,70 @@ def test_looker_ingest_usage_history(pytestconfig, tmp_path, mock_time):
         )
 
 
+@freeze_time(FROZEN_TIME)
+def test_looker_filter_usage_history(pytestconfig, tmp_path, mock_time):
+    mocked_client = mock.MagicMock()
+    with mock.patch("looker_sdk.init40") as mock_sdk:
+        mock_sdk.return_value = mocked_client
+        setup_mock_dashboard_with_usage(mocked_client, skip_look=True)
+        mocked_client.run_inline_query.side_effect = side_effect_query_inline
+        setup_mock_explore(mocked_client)
+        setup_mock_user(mocked_client)
+
+        temp_output_file = f"{tmp_path}/looker_mces.json"
+        pipeline = Pipeline.create(
+            {
+                "run_id": "looker-test",
+                "source": {
+                    "type": "looker",
+                    "config": {
+                        "base_url": "https://looker.company.com",
+                        "client_id": "foo",
+                        "client_secret": "bar",
+                        "extract_usage_history": True,
+                        "max_threads": 1,
+                    },
+                },
+                "sink": {
+                    "type": "file",
+                    "config": {
+                        "filename": temp_output_file,
+                    },
+                },
+            }
+        )
+        pipeline.run()
+        pipeline.pretty_print_summary()
+        pipeline.raise_from_status()
+
+        # There should be 4 dashboardUsageStatistics aspects (one absolute and 3 timeseries)
+        dashboard_usage_aspect_count = 0
+        # There should be 0 chartUsageStatistics -- filtered by set of ingested charts
+        chart_usage_aspect_count = 0
+        with open(temp_output_file) as f:
+            temp_output_dict = json.load(f)
+            for element in temp_output_dict:
+                if (
+                    element.get("entityType") == "dashboard"
+                    and element.get("aspectName") == "dashboardUsageStatistics"
+                ):
+                    dashboard_usage_aspect_count = dashboard_usage_aspect_count + 1
+                if (
+                    element.get("entityType") == "chart"
+                    and element.get("aspectName") == "chartUsageStatistics"
+                ):
+                    chart_usage_aspect_count = chart_usage_aspect_count + 1
+
+        assert dashboard_usage_aspect_count == 4
+        assert chart_usage_aspect_count == 0
+
+        source_report = cast(LookerDashboardSourceReport, pipeline.source.get_report())
+        # From timeseries query
+        assert str(source_report.dashboards_skipped_for_usage) == str(["5"])
+        # From dashboard element
+        assert str(source_report.charts_skipped_for_usage) == str(["3"])
+
+
 @freeze_time(FROZEN_TIME)
 def test_looker_ingest_stateful(pytestconfig, tmp_path, mock_time, mock_datahub_graph):
     output_file_name: str = "looker_mces.json"