From f24e1041c5018b1ec2caf89ef5bce29f127f3302 Mon Sep 17 00:00:00 2001 From: Tapas Kumar Senapati Date: Fri, 23 Sep 2022 14:44:55 +0530 Subject: [PATCH] Cml profile instrumentation (#45) * Added cml instrumentation --- dbt/adapters/hive/cloudera_tracking.py | 21 +++++++++++++++++++++ dbt/adapters/hive/connections.py | 2 ++ 2 files changed, 23 insertions(+) diff --git a/dbt/adapters/hive/cloudera_tracking.py b/dbt/adapters/hive/cloudera_tracking.py index 1c06976..99391de 100644 --- a/dbt/adapters/hive/cloudera_tracking.py +++ b/dbt/adapters/hive/cloudera_tracking.py @@ -14,6 +14,7 @@ import dbt.version import json +import os import platform import requests import sys @@ -41,6 +42,9 @@ # Json object to store dbt profile(profile.yml) related information profile_info = {} +# Json object to store cml environment variables +cml_info = {} + def populate_platform_info(cred: Credentials, ver): """ @@ -66,6 +70,22 @@ def populate_platform_info(cred: Credentials, ver): platform_info["dbt_adapter"] = f"{cred.type}-{ver.version}" +def populate_cml_info(): + """ + populate cml environment variables if available to be passed on for tracking + """ + default_value = "" # if environment variables doesn't exist add empty string as default + cml_info["ml_runtime_edition"] = os.environ.get('ML_RUNTIME_EDITION', default_value) + cml_info["ml_runtime_git_hash"] = os.environ.get('ML_RUNTIME_GIT_HASH', default_value) + cml_info["ml_runtime_kernel"] = os.environ.get('ML_RUNTIME_KERNEL', default_value) + cml_info["ml_runtime_editor"] = os.environ.get('ML_RUNTIME_EDITOR', default_value) + cml_info["ml_runtime_gbn"] = os.environ.get('ML_RUNTIME_GBN', default_value) + cml_info["ml_runtime_full_version"] = os.environ.get('ML_RUNTIME_FULL_VERSION', default_value) + cml_info["ml_runtime_description"] = os.environ.get('ML_RUNTIME_DESCRIPTION', default_value) + cml_info["ml_runtime_maintenance_version"] = os.environ.get('ML_RUNTIME_MAINTENANCE_VERSION', default_value) + cml_info["ml_runtime_metadata_version"] = os.environ.get('ML_RUNTIME_METADATA_VERSION', default_value) + + def populate_unique_ids(cred: Credentials): host = str(cred.host).encode() user = str(cred.username).encode() @@ -178,6 +198,7 @@ def track_usage(tracking_payload): # inject other static payload to tracking_payload tracking_payload = _merge_keys(unique_ids, tracking_payload) tracking_payload = _merge_keys(platform_info, tracking_payload) + tracking_payload = _merge_keys(cml_info, tracking_payload) tracking_payload = _merge_keys(profile_info, tracking_payload) # form the tracking data diff --git a/dbt/adapters/hive/connections.py b/dbt/adapters/hive/connections.py index e98c1c7..6997932 100644 --- a/dbt/adapters/hive/connections.py +++ b/dbt/adapters/hive/connections.py @@ -94,6 +94,8 @@ def __post_init__(self): tracker.usage_tracking = self.usage_tracking # get platform information for tracking tracker.populate_platform_info(self, ver) + # get cml information for tracking + tracker.populate_cml_info() # generate unique ids for tracking tracker.populate_unique_ids(self)