Skip to content

Commit

Permalink
Merge branch 'master' into feature/cus-3546-fix-tableau-authentication
Browse files Browse the repository at this point in the history
  • Loading branch information
sgomezvillamor authored Dec 24, 2024
2 parents 2e9ce71 + 09a9b6e commit 7a03ddf
Show file tree
Hide file tree
Showing 17 changed files with 1,050 additions and 135 deletions.
196 changes: 193 additions & 3 deletions datahub-graphql-core/src/main/resources/entity.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,16 @@ type Query {
Fetch all Business Attributes
"""
listBusinessAttributes(input: ListBusinessAttributesInput!): ListBusinessAttributesResult

"""
Fetch a Data Process Instance by primary key (urn)
"""
dataProcessInstance(urn: String!): DataProcessInstance


}


"""
An ERModelRelationship is a high-level abstraction that dictates what datasets fields are erModelRelationshiped.
"""
Expand Down Expand Up @@ -9832,15 +9840,45 @@ type MLModelGroup implements EntityWithRelationships & Entity & BrowsableEntity
privileges: EntityPrivileges
}

"""
Properties describing a group of related ML models
"""
type MLModelGroupProperties {
"""
Display name of the model group
"""
name: String

"""
Detailed description of the model group's purpose and contents
"""
description: String

createdAt: Long
"""
When this model group was created
"""
created: AuditStamp

"""
When this model group was last modified
"""
lastModified: AuditStamp

"""
Version identifier for this model group
"""
version: VersionTag

"""
Custom key-value properties for the model group
"""
customProperties: [CustomPropertiesEntry!]

"""
Deprecated creation timestamp
@deprecated Use the 'created' field instead
"""
createdAt: Long @deprecated(reason: "Use `created` instead")
}

"""
Expand Down Expand Up @@ -9990,40 +10028,103 @@ description: String
}

type MLMetric {
"""
Name of the metric (e.g. accuracy, precision, recall)
"""
name: String

"""
Description of what this metric measures
"""
description: String

"""
The computed value of the metric
"""
value: String

"""
Timestamp when this metric was recorded
"""
createdAt: Long
}

type MLModelProperties {
"""
The display name of the model used in the UI
"""
name: String!

"""
Detailed description of the model's purpose and characteristics
"""
description: String

date: Long
"""
When the model was last modified
"""
lastModified: AuditStamp

"""
Version identifier for this model
"""
version: String

"""
The type/category of ML model (e.g. classification, regression)
"""
type: String

"""
Mapping of hyperparameter configurations
"""
hyperParameters: HyperParameterMap

hyperParams: [MLHyperParam]
"""
List of hyperparameter settings used to train this model
"""
hyperParams: [MLHyperParam]

"""
Performance metrics from model training
"""
trainingMetrics: [MLMetric]

"""
Names of ML features used by this model
"""
mlFeatures: [String!]

"""
Tags for categorizing and searching models
"""
tags: [String!]

"""
Model groups this model belongs to
"""
groups: [MLModelGroup]

"""
Additional custom properties specific to this model
"""
customProperties: [CustomPropertiesEntry!]

"""
URL to view this model in external system
"""
externalUrl: String

"""
When this model was created
"""
created: AuditStamp

"""
Deprecated timestamp for model creation
@deprecated Use 'created' field instead
"""
date: Long @deprecated(reason: "Use `created` instead")
}

type MLFeatureProperties {
Expand Down Expand Up @@ -12804,3 +12905,92 @@ type CronSchedule {
"""
timezone: String!
}


"""
Properties describing a data process instance's execution metadata
"""
type DataProcessInstanceProperties {
"""
The display name of this process instance
"""
name: String!

"""
URL to view this process instance in the external system
"""
externalUrl: String

"""
When this process instance was created
"""
created: AuditStamp

"""
Additional custom properties specific to this process instance
"""
customProperties: [CustomPropertiesEntry!]
}

"""
Properties specific to an ML model training run instance
"""
type MLTrainingRunProperties {
"""
Unique identifier for this training run
"""
id: String

"""
List of URLs to access training run outputs (e.g. model artifacts, logs)
"""
outputUrls: [String]

"""
Hyperparameters used in this training run
"""
hyperParams: [MLHyperParam]

"""
Performance metrics recorded during this training run
"""
trainingMetrics: [MLMetric]
}

extend type DataProcessInstance {

"""
Additional read only properties associated with the Data Job
"""
properties: DataProcessInstanceProperties

"""
The specific instance of the data platform that this entity belongs to
"""
dataPlatformInstance: DataPlatformInstance

"""
Sub Types that this entity implements
"""
subTypes: SubTypes

"""
The parent container in which the entity resides
"""
container: Container

"""
Standardized platform urn where the data process instance is defined
"""
platform: DataPlatform!

"""
Recursively get the lineage of containers for this entity
"""
parentContainers: ParentContainersResult

"""
Additional properties when subtype is Training Run
"""
mlTrainingRunProperties: MLTrainingRunProperties
}
Original file line number Diff line number Diff line change
Expand Up @@ -1408,6 +1408,15 @@ class LookerDashboardSourceReport(StaleEntityRemovalSourceReport):
dashboards_with_activity: LossySet[str] = dataclasses_field(
default_factory=LossySet
)

# Entities that don't seem to exist, so we don't emit usage aspects for them despite having usage data
dashboards_skipped_for_usage: LossySet[str] = dataclasses_field(
default_factory=LossySet
)
charts_skipped_for_usage: LossySet[str] = dataclasses_field(
default_factory=LossySet
)

stage_latency: List[StageLatency] = dataclasses_field(default_factory=list)
_looker_explore_registry: Optional[LookerExploreRegistry] = None
total_explores: int = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
ViewField,
ViewFieldType,
gen_model_key,
get_urn_looker_element_id,
)
from datahub.ingestion.source.looker.looker_config import LookerDashboardSourceConfig
from datahub.ingestion.source.looker.looker_lib_wrapper import LookerAPI
Expand Down Expand Up @@ -165,6 +166,9 @@ def __init__(self, config: LookerDashboardSourceConfig, ctx: PipelineContext):
# Required, as we do not ingest all folders but only those that have dashboards/looks
self.processed_folders: List[str] = []

# Keep track of ingested chart urns, to omit usage for non-ingested entities
self.chart_urns: Set[str] = set()

@staticmethod
def test_connection(config_dict: dict) -> TestConnectionReport:
test_report = TestConnectionReport()
Expand Down Expand Up @@ -642,6 +646,7 @@ def _make_chart_metadata_events(
chart_urn = self._make_chart_urn(
element_id=dashboard_element.get_urn_element_id()
)
self.chart_urns.add(chart_urn)
chart_snapshot = ChartSnapshot(
urn=chart_urn,
aspects=[Status(removed=False)],
Expand Down Expand Up @@ -1380,7 +1385,9 @@ def _get_folder_and_ancestors_workunits(
yield from self._emit_folder_as_container(folder)

def extract_usage_stat(
self, looker_dashboards: List[looker_usage.LookerDashboardForUsage]
self,
looker_dashboards: List[looker_usage.LookerDashboardForUsage],
ingested_chart_urns: Set[str],
) -> List[MetadataChangeProposalWrapper]:
looks: List[looker_usage.LookerChartForUsage] = []
# filter out look from all dashboard
Expand All @@ -1391,6 +1398,15 @@ def extract_usage_stat(

# dedup looks
looks = list({str(look.id): look for look in looks}.values())
filtered_looks = []
for look in looks:
if not look.id:
continue
chart_urn = self._make_chart_urn(get_urn_looker_element_id(look.id))
if chart_urn in ingested_chart_urns:
filtered_looks.append(look)
else:
self.reporter.charts_skipped_for_usage.add(look.id)

# Keep stat generators to generate entity stat aspect later
stat_generator_config: looker_usage.StatGeneratorConfig = (
Expand All @@ -1414,7 +1430,7 @@ def extract_usage_stat(
stat_generator_config,
self.reporter,
self._make_chart_urn,
looks,
filtered_looks,
)

mcps: List[MetadataChangeProposalWrapper] = []
Expand Down Expand Up @@ -1669,7 +1685,7 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
if self.source_config.extract_usage_history:
self.reporter.report_stage_start("usage_extraction")
usage_mcps: List[MetadataChangeProposalWrapper] = self.extract_usage_stat(
looker_dashboards_for_usage
looker_dashboards_for_usage, self.chart_urns
)
for usage_mcp in usage_mcps:
yield usage_mcp.as_workunit()
Expand Down
Loading

0 comments on commit 7a03ddf

Please sign in to comment.