diff --git a/README.md b/README.md index edcb27e1..6dca611c 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ Each connector is placed under its own directory under [metaphor](./metaphor) an | [postgresql.profile](metaphor/postgresql/profile/) | Data profile | | [postgresql.usage](metaphor/postgresql/usage/) | Usage | | [power_bi](metaphor/power_bi/) | Dashboard, lineage | +| [quick_sight](metaphor/quick_sight/) | Dashboard, lineage | | [redshift](metaphor/redshift/) | Schema, description, statistics, queries | | [redshift.profile](metaphor/redshift/profile/) | Data profile | | [sharepoint](metaphor/sharepoint/) | Document embeddings | diff --git a/metaphor/azure_data_factory/utils.py b/metaphor/azure_data_factory/utils.py index 174b14d6..0ac7bc1c 100644 --- a/metaphor/azure_data_factory/utils.py +++ b/metaphor/azure_data_factory/utils.py @@ -6,6 +6,7 @@ from metaphor.common.entity_id import dataset_normalized_name from metaphor.common.logger import get_logger +from metaphor.common.snowflake import normalize_snowflake_account from metaphor.common.utils import removesuffix from metaphor.models.metadata_change_event import ( DataPlatform, @@ -240,11 +241,9 @@ def process_snowflake_linked_service( query_db = parse_qs(url.query or "").get("db") database = query_db[0] if query_db else None - # extract snowflake account name from jdbc format, 'snowflake://.snowflakecomputing.com/' + # extract snowflake account name from jdbc format, 'snowflake:///' hostname = urlparse(url.path).hostname - snowflake_account = ( - removesuffix(hostname, ".snowflakecomputing.com") if hostname else None - ) + snowflake_account = normalize_snowflake_account(hostname) if hostname else None return LinkedService(database=database, account=snowflake_account) diff --git a/metaphor/common/entity_id.py b/metaphor/common/entity_id.py index a8777ebc..7ffc285a 100644 --- a/metaphor/common/entity_id.py +++ b/metaphor/common/entity_id.py @@ -103,6 +103,15 @@ def to_dashboard_entity_id_from_logical_id(logical_id: DashboardLogicalID) -> En return EntityId(EntityType.DASHBOARD, logical_id) +def to_entity_id_from_virtual_view_logical_id( + logical_id: VirtualViewLogicalID, +) -> EntityId: + """ + converts a VirtualView logical ID to entity ID + """ + return EntityId(EntityType.VIRTUAL_VIEW, logical_id) + + def normalize_full_dataset_name(name: str) -> str: """ Normalizes a fully qualified dataset name diff --git a/metaphor/common/snowflake.py b/metaphor/common/snowflake.py index e7a38000..138f5e17 100644 --- a/metaphor/common/snowflake.py +++ b/metaphor/common/snowflake.py @@ -1,17 +1,21 @@ PRIVATE_LINK_SUFFIX = ".privatelink" +SNOWFLAKE_HOST_SUFFIX = ".snowflakecomputing.com" -def normalize_snowflake_account(account: str) -> str: +def normalize_snowflake_account(host: str) -> str: """ Normalize different variations of Snowflake account. See https://docs.snowflake.com/en/user-guide/admin-account-identifier """ # Account name is case insensitive - account = account.lower() + host = host.lower() + + if host.endswith(SNOWFLAKE_HOST_SUFFIX): + host = host[: -len(SNOWFLAKE_HOST_SUFFIX)] # Strip PrivateLink suffix - if account.endswith(PRIVATE_LINK_SUFFIX): - return account[: -len(PRIVATE_LINK_SUFFIX)] + if host.endswith(PRIVATE_LINK_SUFFIX): + return host[: -len(PRIVATE_LINK_SUFFIX)] - return account + return host diff --git a/metaphor/fivetran/extractor.py b/metaphor/fivetran/extractor.py index 859488ca..679620a1 100644 --- a/metaphor/fivetran/extractor.py +++ b/metaphor/fivetran/extractor.py @@ -400,9 +400,7 @@ def get_snowflake_account_from_config(config: dict) -> Optional[str]: if host is None: return None - # remove snowflakecomputing.com parts - account = ".".join(host.split(".")[:-2]) - return normalize_snowflake_account(account) + return normalize_snowflake_account(host) @staticmethod def get_source_account_from_config( diff --git a/metaphor/quick_sight/README.md b/metaphor/quick_sight/README.md new file mode 100644 index 00000000..c8a6d2be --- /dev/null +++ b/metaphor/quick_sight/README.md @@ -0,0 +1,74 @@ +# QuickSight Connector + +This connector extracts technical metadata from AWS QuickSight using the [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) library. + +## Setup + +We recommend creating a dedicated AWS IAM user for the crawler with limited permissions based on the following IAM policy: + +``` json +{ + "Version": "2012-10-17", + "Statement": + [ + { + "Effect": "Allow", + "Action": + [ + "quicksight:DescribeAnalysis", + "quicksight:DescribeDashboard", + "quicksight:DescribeDataSource", + "quicksight:DescribeDataSet", + "quicksight:DescribeDataSetRefreshProperties", + "quicksight:DescribeFolder", + "quicksight:DescribeUser", + "quicksight:ListAnalyses", + "quicksight:ListDashboards", + "quicksight:ListDataSources", + "quicksight:ListDataSets", + "quicksight:ListFolders", + "quicksight:ListUsers", + ], + "Resource": + [ + "*" + ] + } + ] +} +``` + +## Config File + +Create a YAML config file based on the following template. + +### Required Configurations + +You must specify an AWS user credential to access QuickSight API. You can also specify a role ARN and let the connector assume the role before accessing AWS APIs. + +```yaml +aws: + access_key_id: + secret_access_key: + region_name: + assume_role_arn: # If using IAM role +aws_account_id: +``` + +### Optional Configurations + +#### Output Destination + +See [Output Config](../common/docs/output.md) for more information. + +## Testing + +Follow the [Installation](../../README.md) instructions to install `metaphor-connectors` in your environment (or virtualenv). + +Run the following command to test the connector locally: + +```shell +metaphor quick_sight +``` + +Manually verify the output after the run finishes. diff --git a/metaphor/quick_sight/__init__.py b/metaphor/quick_sight/__init__.py new file mode 100644 index 00000000..582695af --- /dev/null +++ b/metaphor/quick_sight/__init__.py @@ -0,0 +1,6 @@ +from metaphor.common.cli import cli_main +from metaphor.quick_sight.extractor import QuickSightExtractor + + +def main(config_file: str): + cli_main(QuickSightExtractor, config_file) diff --git a/metaphor/quick_sight/client.py b/metaphor/quick_sight/client.py new file mode 100644 index 00000000..593b4d77 --- /dev/null +++ b/metaphor/quick_sight/client.py @@ -0,0 +1,111 @@ +import enum +from typing import Dict, List + +import boto3 +from pydantic.dataclasses import dataclass + +from metaphor.common.aws import AwsCredentials +from metaphor.common.logger import json_dump_to_debug_file +from metaphor.quick_sight.models import Dashboard, DataSet, DataSource, ResourceType + + +def create_quick_sight_client(aws: AwsCredentials) -> boto3.client: + return aws.get_session().client("quicksight") + + +class Endpoint(enum.Enum): + list_dashboards = "list_dashboards" + list_data_sets = "list_data_sets" + list_data_sources = "list_data_sources" + + +@dataclass +class EndpointDictKeys: + list_key: str + item_key: str + + +ENDPOINT_SETTING = { + Endpoint.list_data_sets: EndpointDictKeys("DataSetSummaries", "DataSetId"), + Endpoint.list_dashboards: EndpointDictKeys("DashboardSummaryList", "DashboardId"), + Endpoint.list_data_sources: EndpointDictKeys("DataSources", "DataSourceId"), +} + + +class Client: + def __init__( + self, + aws: AwsCredentials, + aws_account_id: str, + resources: Dict[str, ResourceType], + ): + self._client = create_quick_sight_client(aws) + self._aws_account_id = aws_account_id + self._resources = resources + + def get_resources(self): + self._get_dataset_detail() + self._get_dashboard_detail() + self._get_data_source_detail() + + def _get_resource_ids(self, endpoint: Endpoint) -> List[str]: + paginator = self._client.get_paginator(endpoint.value) + paginator_response = paginator.paginate(AwsAccountId=self._aws_account_id) + + ids = [] + settings = ENDPOINT_SETTING[endpoint] + for page in paginator_response: + for item in page[settings.list_key]: + ids.append(item[settings.item_key]) + return ids + + def _get_dataset_detail(self) -> None: + results = [] + for dataset_id in self._get_resource_ids(Endpoint.list_data_sets): + result = self._client.describe_data_set( + AwsAccountId=self._aws_account_id, DataSetId=dataset_id + ) + + results.append(result) + + dataset = DataSet(**(result["DataSet"])) + + if dataset.Arn is None: + continue + + self._resources[dataset.Arn] = dataset + + json_dump_to_debug_file(results, "datasets.json") + + def _get_dashboard_detail(self): + results = [] + for dashboard_id in self._get_resource_ids(Endpoint.list_dashboards): + result = self._client.describe_dashboard( + AwsAccountId=self._aws_account_id, DashboardId=dashboard_id + ) + results.append(result) + dashboard = Dashboard(**(result["Dashboard"])) + + if dashboard.Arn is None: + continue + + self._resources[dashboard.Arn] = dashboard + + json_dump_to_debug_file(results, "dashboards.json") + + def _get_data_source_detail(self): + results = [] + for data_source_id in self._get_resource_ids(Endpoint.list_data_sources): + result = self._client.describe_data_source( + AwsAccountId=self._aws_account_id, DataSourceId=data_source_id + ) + results.append(result) + + data_source = DataSource(**(result["DataSource"])) + + if data_source.Arn is None: + continue + + self._resources[data_source.Arn] = data_source + + json_dump_to_debug_file(results, "data_sources.json") diff --git a/metaphor/quick_sight/config.py b/metaphor/quick_sight/config.py new file mode 100644 index 00000000..0351ed02 --- /dev/null +++ b/metaphor/quick_sight/config.py @@ -0,0 +1,12 @@ +from pydantic.dataclasses import dataclass + +from metaphor.common.aws import AwsCredentials +from metaphor.common.base_config import BaseConfig +from metaphor.common.dataclass import ConnectorConfig + + +@dataclass(config=ConnectorConfig) +class QuickSightRunConfig(BaseConfig): + aws: AwsCredentials + + aws_account_id: str diff --git a/metaphor/quick_sight/data_source_utils.py b/metaphor/quick_sight/data_source_utils.py new file mode 100644 index 00000000..40c31699 --- /dev/null +++ b/metaphor/quick_sight/data_source_utils.py @@ -0,0 +1,105 @@ +from typing import Optional + +from metaphor.common.snowflake import normalize_snowflake_account +from metaphor.models.metadata_change_event import DataPlatform +from metaphor.quick_sight.models import ( + DataSource, + DataSourceType, + TypeDataSourceParameters, +) + +DATA_SOURCE_PLATFORM_MAP = { + DataSourceType.AURORA: DataPlatform.MYSQL, + DataSourceType.AURORA_POSTGRESQL: DataPlatform.POSTGRESQL, + DataSourceType.BIGQUERY: DataPlatform.BIGQUERY, + DataSourceType.DATABRICKS: DataPlatform.UNITY_CATALOG, + DataSourceType.MARIADB: DataPlatform.MYSQL, + DataSourceType.MYSQL: DataPlatform.MYSQL, + DataSourceType.POSTGRESQL: DataPlatform.POSTGRESQL, + DataSourceType.REDSHIFT: DataPlatform.REDSHIFT, + DataSourceType.ORACLE: DataPlatform.ORACLE, + DataSourceType.SNOWFLAKE: DataPlatform.SNOWFLAKE, + DataSourceType.SQLSERVER: DataPlatform.MSSQL, +} + + +def _get_database_from_parameters(parameters: TypeDataSourceParameters): + if parameters.AuroraParameters: + return parameters.AuroraParameters.Database + + if parameters.AuroraPostgreSqlParameters: + return parameters.AuroraPostgreSqlParameters.Database + + if parameters.BigQueryParameters: + return parameters.BigQueryParameters.ProjectId + + if parameters.MariaDbParameters: + return parameters.MariaDbParameters.Database + + if parameters.MySqlParameters: + return parameters.MySqlParameters.Database + + if parameters.OracleParameters: + return parameters.OracleParameters.Database + + if parameters.PostgreSqlParameters: + return parameters.PostgreSqlParameters.Database + + if parameters.RdsParameters: + return parameters.RdsParameters.Database + + if parameters.RedshiftParameters: + return parameters.RedshiftParameters.Database + + if parameters.SnowflakeParameters: + return parameters.SnowflakeParameters.Database + + if parameters.SqlServerParameters: + return parameters.SqlServerParameters.Database + + return None + + +def get_database(data_source: DataSource) -> Optional[str]: + """ + Extract database from DataSource parameters + """ + if data_source.DataSourceParameters is None: + return None + + parameters = data_source.DataSourceParameters + + return _get_database_from_parameters(parameters) + + +def get_account(data_source: DataSource) -> Optional[str]: + """ + Extract account from DataSource parameters + """ + if data_source.DataSourceParameters is None: + return None + + parameters = data_source.DataSourceParameters + + if parameters.AuroraParameters: + return parameters.AuroraParameters.Host + + if parameters.MariaDbParameters: + return parameters.MariaDbParameters.Host + + if parameters.MySqlParameters: + return parameters.MySqlParameters.Host + + if parameters.OracleParameters: + return parameters.OracleParameters.Host + + if parameters.SnowflakeParameters: + return ( + normalize_snowflake_account(parameters.SnowflakeParameters.Host) + if parameters.SnowflakeParameters.Host + else None + ) + + if parameters.SqlServerParameters: + return parameters.SqlServerParameters.Host + return None diff --git a/metaphor/quick_sight/extractor.py b/metaphor/quick_sight/extractor.py new file mode 100644 index 00000000..538ca7ae --- /dev/null +++ b/metaphor/quick_sight/extractor.py @@ -0,0 +1,174 @@ +from typing import Collection, Dict, List + +from metaphor.common.base_extractor import BaseExtractor +from metaphor.common.entity_id import to_entity_id_from_virtual_view_logical_id +from metaphor.common.event_util import ENTITY_TYPES +from metaphor.common.logger import get_logger +from metaphor.common.utils import unique_list +from metaphor.models.crawler_run_metadata import Platform +from metaphor.models.metadata_change_event import AssetStructure, Chart, ChartType +from metaphor.models.metadata_change_event import Dashboard as MetaphorDashboard +from metaphor.models.metadata_change_event import DashboardInfo as MetaphorDashboardInfo +from metaphor.models.metadata_change_event import ( + DashboardLogicalID as MetaphorDashboardLogicalId, +) +from metaphor.models.metadata_change_event import ( + DashboardPlatform as MetaphorDashboardPlatform, +) +from metaphor.models.metadata_change_event import ( + EntityUpstream, + SourceInfo, + VirtualView, + VirtualViewLogicalID, + VirtualViewType, +) +from metaphor.quick_sight.client import Client +from metaphor.quick_sight.config import QuickSightRunConfig +from metaphor.quick_sight.lineage import ( + extract_virtual_view_schema, + extract_virtual_view_upstream, + process_dataset_lineage, +) +from metaphor.quick_sight.models import Dashboard, DataSet, ResourceType + +logger = get_logger() + + +class QuickSightExtractor(BaseExtractor): + """QuickSight metadata extractor""" + + _description = "Quick Sight metadata crawler" + _platform = Platform.QUICK_SIGHT + + @staticmethod + def from_config_file(config_file: str) -> "QuickSightExtractor": + return QuickSightExtractor(QuickSightRunConfig.from_yaml_file(config_file)) + + def __init__(self, config: QuickSightRunConfig) -> None: + super().__init__(config) + self._aws_config = config.aws + self._aws_account_id = config.aws_account_id + + # Arn -> Resource + self._resources: Dict[str, ResourceType] = {} + + # Arn -> VirtualView + self._virtual_views: Dict[str, VirtualView] = {} + + # Arn -> Dashboard + self._dashboards: Dict[str, MetaphorDashboard] = {} + + async def extract(self) -> Collection[ENTITY_TYPES]: + logger.info("Fetching metadata from QuickSight") + + client = Client(self._aws_config, self._aws_account_id, self._resources) + client.get_resources() + + self._extract_virtual_views() + self._extract_dashboards() + + return self._make_entities_list() + + def _extract_virtual_views(self): + for data_set in self._resources.values(): + if not isinstance(data_set, DataSet) or data_set.Arn is None: + continue + + view = self._init_virtual_view(data_set.Arn, data_set) + + columns, source_entities = process_dataset_lineage( + self._resources, data_set + ) + + view.schema = extract_virtual_view_schema(data_set, columns) + view.entity_upstream = extract_virtual_view_upstream( + columns, source_entities + ) + + def _extract_dashboards(self) -> None: + for dashboard in self._resources.values(): + if ( + not isinstance(dashboard, Dashboard) + or dashboard.Arn is None + or dashboard.Version is None + ): + continue + + metaphor_dashboard = self._init_dashboard(dashboard.Arn, dashboard) + metaphor_dashboard.entity_upstream = self._get_dashboard_upstream( + dataset_arns=dashboard.Version.DataSetArns or [] + ) + + def _make_entities_list(self) -> Collection[ENTITY_TYPES]: + entities: List[ENTITY_TYPES] = [] + entities.extend(self._virtual_views.values()) + entities.extend(self._dashboards.values()) + return entities + + def _init_virtual_view(self, arn: str, data_set: DataSet) -> VirtualView: + view = VirtualView( + logical_id=VirtualViewLogicalID( + name=arn, + type=VirtualViewType.QUICK_SIGHT, + ), + structure=AssetStructure(name=data_set.Name), + source_info=SourceInfo( + created_at_source=data_set.CreatedTime, + last_updated=data_set.LastUpdatedTime, + ), + ) + + self._virtual_views[arn] = view + + return view + + def _init_dashboard(self, arn: str, dashboard: Dashboard) -> MetaphorDashboard: + assert dashboard.Version + + metaphor_dashboard = MetaphorDashboard( + logical_id=MetaphorDashboardLogicalId( + dashboard_id=arn, + platform=MetaphorDashboardPlatform.QUICK_SIGHT, + ), + source_info=SourceInfo( + created_at_source=dashboard.CreatedTime, + last_updated=dashboard.LastUpdatedTime, + ), + structure=AssetStructure( + name=dashboard.Name, + ), + ) + + sheets = dashboard.Version.Sheets or [] + + metaphor_dashboard.dashboard_info = MetaphorDashboardInfo( + description=dashboard.Version.Description, + title=dashboard.Name, + charts=[ + Chart( + chart_type=ChartType.OTHER, + title=sheet.Name, + url=None, + ) + for sheet in sheets + ], + ) + + self._dashboards[arn] = metaphor_dashboard + + return metaphor_dashboard + + def _get_dashboard_upstream(self, dataset_arns: List[str]) -> EntityUpstream: + source_entities: List[str] = [] + + for arn in dataset_arns: + virtual_view = self._virtual_views.get(arn) + if not virtual_view: + continue + source_entities.append( + str(to_entity_id_from_virtual_view_logical_id(virtual_view.logical_id)) + ) + + return EntityUpstream( + source_entities=(unique_list(source_entities) if source_entities else None) + ) diff --git a/metaphor/quick_sight/lineage.py b/metaphor/quick_sight/lineage.py new file mode 100644 index 00000000..a560fbf4 --- /dev/null +++ b/metaphor/quick_sight/lineage.py @@ -0,0 +1,330 @@ +from typing import Dict, List, Optional, Set, Tuple + +from pydantic.dataclasses import Field, dataclass + +from metaphor.common.entity_id import ( + parts_to_dataset_entity_id, + to_entity_id_from_virtual_view_logical_id, +) +from metaphor.common.sql.table_level_lineage.table_level_lineage import ( + extract_table_level_lineage, +) +from metaphor.models.metadata_change_event import ( + EntityUpstream, + FieldMapping, + SourceField, + VirtualViewLogicalID, + VirtualViewSchema, + VirtualViewSchemaField, + VirtualViewType, +) +from metaphor.quick_sight.data_source_utils import ( + DATA_SOURCE_PLATFORM_MAP, + get_account, + get_database, +) +from metaphor.quick_sight.models import ( + DataSet, + DataSetLogicalTable, + DataSetPhysicalTable, + DataSource, + ResourceType, + TransformOperation, + TypeCustomSql, + TypeRelationalTable, +) + + +@dataclass +class ColumnReference: + upstream_id: Optional[str] + name: str + + +@dataclass +class Column: + upstream: List[ColumnReference] = Field(default_factory=list) + expression: Optional[str] = None + + +TypeColumnMap = Dict[str, Column] + + +def _get_source_from_relation_table( + resources: Dict[str, ResourceType], + relation_table: TypeRelationalTable, + source_entities: Set[str], +) -> Optional[str]: + data_source = resources.get(relation_table.DataSourceArn) + + if ( + data_source is None + or not isinstance(data_source, DataSource) + or data_source.Type is None + ): + return None + + data_platform = DATA_SOURCE_PLATFORM_MAP.get(data_source.Type) + if not data_platform: + return None + + database = get_database(data_source) + account = get_account(data_source) + + source_entity_id = str( + parts_to_dataset_entity_id( + platform=data_platform, + account=account, + database=relation_table.Catalog or database, + schema=relation_table.Schema, + table=relation_table.Name, + ) + ) + + source_entities.add(source_entity_id) + return source_entity_id + + +def _get_source_from_custom_sql( + resources: Dict[str, ResourceType], + custom_sql: TypeCustomSql, + source_entities: Set[str], +) -> None: + data_source = resources.get(custom_sql.DataSourceArn) + + if ( + data_source is None + or not isinstance(data_source, DataSource) + or data_source.Type is None + ): + return None + + data_platform = DATA_SOURCE_PLATFORM_MAP.get(data_source.Type) + if not data_platform: + return None + + database = get_database(data_source) + account = get_account(data_source) + query = custom_sql.SqlQuery + + tll = extract_table_level_lineage( + query, + platform=data_platform, + account=account, + query_id="", + default_database=database, + ) + + for source in tll.sources: + source_entities.add(source.id) + + return None + + +def _process_physical_table_map( + resources: Dict[str, ResourceType], + tables: Dict[str, TypeColumnMap], + source_entities: Set[str], + physical_table_map: Dict[str, DataSetPhysicalTable], +) -> None: + for table_id, physical_table in physical_table_map.items(): + columns: TypeColumnMap = {} + + if physical_table.CustomSql: + # Table lineage + _get_source_from_custom_sql( + resources, physical_table.CustomSql, source_entities + ) + + # CLL of custom sql is not supported + for column in physical_table.CustomSql.Columns: + if column.Name is None: + continue + columns[column.Name] = Column() + + elif physical_table.RelationalTable: + source_dataset_id = _get_source_from_relation_table( + resources, physical_table.RelationalTable, source_entities + ) + + for column in physical_table.RelationalTable.InputColumns: + if column.Name is None: + continue + columns[column.Name] = Column( + upstream=[ + ColumnReference(upstream_id=source_dataset_id, name=column.Name) + ] + ) + + elif physical_table.S3Source: + for column in physical_table.S3Source.InputColumns: + if column.Name is None: + continue + columns[column.Name] = Column() + + tables[table_id] = columns + return None + + +def _process_transformation( + columns: TypeColumnMap, + transformations: List[TransformOperation], +) -> None: + for transformation in transformations: + if transformation.CreateColumnsOperation: + for column in transformation.CreateColumnsOperation.Columns: + columns[column.ColumnName] = Column( + upstream=[], expression=column.Expression + ) + elif transformation.ProjectOperation: + for key in list(columns.keys()): + if key not in transformation.ProjectOperation.ProjectedColumns: + columns.pop(key) + + elif transformation.RenameColumnOperation: + before = transformation.RenameColumnOperation.ColumnName + after = transformation.RenameColumnOperation.NewColumnName + columns[after] = columns[before] + columns.pop(before) + + +def _process_logical_table_map( + resources: Dict[str, ResourceType], + tables: Dict[str, Dict[str, Column]], + source_entities: Set[str], + logical_table_map: Dict[str, DataSetLogicalTable], +) -> Dict[str, Column]: + logical_tables = list(logical_table_map.items()) + columns: Dict[str, Column] + + # Walk through the dependence tree, the last table (root) will be the output table + while logical_tables: + unresolved = [] + columns = {} + + for table_id, logical_table in logical_tables: + source = logical_table.Source + if source.DataSetArn: + arn = source.DataSetArn + upstream_data_set = resources.get(arn) + if upstream_data_set is None: + continue + assert isinstance(upstream_data_set, DataSet) + + upstream_id = str( + to_entity_id_from_virtual_view_logical_id( + VirtualViewLogicalID(name=arn, type=VirtualViewType.QUICK_SIGHT) + ) + ) + source_entities.add(upstream_id) + for column in upstream_data_set.OutputColumns or []: + if column.Name is None: + continue + columns[column.Name] = Column( + upstream=[ + ColumnReference(upstream_id=upstream_id, name=column.Name) + ] + ) + + elif source.PhysicalTableId: + upstream_table = tables.get(source.PhysicalTableId) + if upstream_table: + columns.update(**upstream_table) + else: + assert False, "should not happen" + + elif source.JoinInstruction: + left_table_id = source.JoinInstruction.LeftOperand + right_table_id = source.JoinInstruction.RightOperand + + left_table = tables.get(left_table_id) + right_table = tables.get(right_table_id) + if left_table and right_table: + columns.update(**left_table) + columns.update(**right_table) + + if not columns: + unresolved.append((table_id, logical_table)) + continue + + _process_transformation(columns, logical_table.DataTransforms or []) + + tables[table_id] = columns + + logical_tables = unresolved + + # Return root + return columns + + +def process_dataset_lineage( + resources: Dict[str, ResourceType], data_set: DataSet +) -> Tuple[TypeColumnMap, Set[str]]: + tables: Dict[str, Dict[str, Column]] = {} + source_entities: Set[str] = set() + + if ( + not data_set.LogicalTableMap + or not data_set.PhysicalTableMap + or not data_set.OutputColumns + ): + return {}, source_entities + + _process_physical_table_map( + resources, tables, source_entities, data_set.PhysicalTableMap + ) + + columns = _process_logical_table_map( + resources, tables, source_entities, data_set.LogicalTableMap + ) + + return columns, source_entities + + +def extract_virtual_view_schema( + data_set: DataSet, columns: TypeColumnMap +) -> Optional[VirtualViewSchema]: + output_columns = data_set.OutputColumns or [] + + if not output_columns: + return None + + fields: List[VirtualViewSchemaField] = [] + for column in output_columns: + if column.Name is None: + continue + reference = columns.get(column.Name) + fields.append( + VirtualViewSchemaField( + field_name=column.Name, + field_path=column.Name.lower(), + description=column.Description, + type=column.Type, + formula=reference.expression if reference else None, + optional_type=( + "FORMULA" if reference and reference.expression else None + ), + ) + ) + return VirtualViewSchema(fields=fields) if fields else None + + +def extract_virtual_view_upstream( + columns: Dict[str, Column], source_entities: Set[str] +) -> EntityUpstream: + field_mappings: List[FieldMapping] = [] + for column_name, upstream_column in columns.items(): + field_mappings.append( + FieldMapping( + destination=column_name.lower(), + sources=[ + SourceField(source_entity_id=x.upstream_id, field=x.name.lower()) + for x in upstream_column.upstream + ], + ) + ) + + return EntityUpstream( + source_entities=sorted(list(source_entities)) if source_entities else None, + field_mappings=field_mappings if field_mappings else None, + ) diff --git a/metaphor/quick_sight/models.py b/metaphor/quick_sight/models.py new file mode 100644 index 00000000..532aeca6 --- /dev/null +++ b/metaphor/quick_sight/models.py @@ -0,0 +1,376 @@ +from datetime import datetime +from enum import Enum +from typing import Any, Dict, List, Optional, Union + +from pydantic.dataclasses import dataclass + + +class TypeImportMode(Enum): + DIRECT_QUERY = "DIRECT_QUERY" + SPICE = "SPICE" + + +@dataclass +class DataSetColumn: + Description: Optional[str] = None + Name: Optional[str] = None + Type: Optional[str] = None + + +@dataclass +class TypeProjectOperation: + ProjectedColumns: List[str] + + +@dataclass +class CalculatedColumn: + ColumnName: str + ColumnId: str + Expression: str + + +@dataclass +class TypeCreateColumnsOperation: + Columns: List[CalculatedColumn] + + +@dataclass +class TypeRenameColumnOperation: + ColumnName: str + NewColumnName: str + + +@dataclass +class TransformOperation: + CreateColumnsOperation: Optional[TypeCreateColumnsOperation] = None + ProjectOperation: Optional[TypeProjectOperation] = None + RenameColumnOperation: Optional[TypeRenameColumnOperation] = None + + +@dataclass +class TypeJoinInstruction: + LeftOperand: str + OnClause: str + RightOperand: str + Type: str + + +@dataclass +class DataSetSource: + DataSetArn: Optional[str] = None + JoinInstruction: Optional[TypeJoinInstruction] = None + PhysicalTableId: Optional[str] = None + + +@dataclass +class DataSetLogicalTable: + Alias: str + Source: DataSetSource + DataTransforms: Optional[List[TransformOperation]] = None + + +@dataclass +class TypeRelationalTable: + DataSourceArn: str + InputColumns: List[DataSetColumn] + Name: str + Catalog: Optional[str] = None + Schema: Optional[str] = None + + +@dataclass +class TypeCustomSql: + DataSourceArn: str + Name: str + SqlQuery: str + Columns: List[DataSetColumn] + + +@dataclass +class TypeS3Source: + DataSourceArn: str + InputColumns: List[DataSetColumn] + + +@dataclass +class DataSetPhysicalTable: + CustomSql: Optional[TypeCustomSql] = None + RelationalTable: Optional[TypeRelationalTable] = None + S3Source: Optional[TypeS3Source] = None + + +@dataclass +class DataSet: + """ + @see: https://docs.aws.amazon.com/quicksight/latest/APIReference/API_DataSet.html + """ + + Arn: Optional[str] = None + CreatedTime: Optional[datetime] = None + DataSetId: Optional[str] = None + ImportMode: Optional[TypeImportMode] = None + LastUpdatedTime: Optional[datetime] = None + LogicalTableMap: Optional[Dict[str, DataSetLogicalTable]] = None + Name: Optional[str] = None + OutputColumns: Optional[List[DataSetColumn]] = None + PhysicalTableMap: Optional[Dict[str, DataSetPhysicalTable]] = None + + +@dataclass +class Sheet: + Name: Optional[str] = None + SheetId: Optional[str] = None + + +@dataclass +class VersionedDashboard: + Arn: Optional[str] = None + CreatedTime: Optional[datetime] = None + DataSetArns: Optional[List[str]] = None + Description: Optional[str] = None + Sheets: Optional[List[Sheet]] = None + SourceEntityArn: Optional[str] = None + VersionNumber: Optional[int] = None + + +@dataclass +class Dashboard: + """ + @see: https://docs.aws.amazon.com/quicksight/latest/APIReference/API_Dashboard.html + """ + + Arn: Optional[str] = None + CreatedTime: Optional[datetime] = None + DashboardId: Optional[str] = None + LastPublishedTime: Optional[datetime] = None + LastUpdatedTime: Optional[datetime] = None + Name: Optional[str] = None + Version: Optional[VersionedDashboard] = None + + +@dataclass +class Visual: + BarChartVisual: Optional[Any] = None + BoxPlotVisual: Optional[Any] = None + ComboChartVisual: Optional[Any] = None + CustomContentVisual: Optional[Any] = None + EmptyVisual: Optional[Any] = None + FilledMapVisual: Optional[Any] = None + FunnelChartVisual: Optional[Any] = None + GaugeChartVisual: Optional[Any] = None + GeospatialMapVisual: Optional[Any] = None + HeatMapVisual: Optional[Any] = None + HistogramVisual: Optional[Any] = None + InsightVisual: Optional[Any] = None + KPIVisual: Optional[Any] = None + LineChartVisual: Optional[Any] = None + PieChartVisual: Optional[Any] = None + PivotTableVisual: Optional[Any] = None + RadarChartVisual: Optional[Any] = None + SankeyDiagramVisual: Optional[Any] = None + ScatterPlotVisual: Optional[Any] = None + TreeMapVisual: Optional[Any] = None + WaterfallVisual: Optional[Any] = None + WordCloudVisual: Optional[Any] = None + + +@dataclass +class SheetDefinition: + SheetId: str + ContentType: Optional[str] = None + Description: Optional[str] = None + Name: Optional[str] = None + Title: Optional[str] = None + Visuals: Optional[List[Visual]] = None + + +@dataclass +class DataSetIdentifierDeclaration: + DataSetArn: str + Identifier: str + + +@dataclass +class AnalysisDefinition: + """ + @see: https://docs.aws.amazon.com/quicksight/latest/APIReference/API_AnalysisDefinition.html + """ + + DataSetIdentifierDeclarations: List[DataSetIdentifierDeclaration] + Sheets: Optional[List[SheetDefinition]] = None + + +@dataclass +class Analysis: + AnalysisId: str + Arn: str + Name: str + Definition: AnalysisDefinition + + +class DataSourceType(Enum): + """ + @see: https://docs.aws.amazon.com/quicksight/latest/APIReference/API_DataSource.html + """ + + ADOBE_ANALYTICS = "ADOBE_ANALYTICS" + AMAZON_ELASTICSEARCH = "AMAZON_ELASTICSEARCH" + ATHENA = "ATHENA" + AURORA = "AURORA" + AURORA_POSTGRESQL = "AURORA_POSTGRESQL" + AWS_IOT_ANALYTICS = "AWS_IOT_ANALYTICS" + GITHUB = "GITHUB" + JIRA = "JIRA" + MARIADB = "MARIADB" + MYSQL = "MYSQL" + ORACLE = "ORACLE" + POSTGRESQL = "POSTGRESQL" + PRESTO = "PRESTO" + REDSHIFT = "REDSHIFT" + S3 = "S3" + SALESFORCE = "SALESFORCE" + SERVICENOW = "SERVICENOW" + SNOWFLAKE = "SNOWFLAKE" + SPARK = "SPARK" + SQLSERVER = "SQLSERVER" + TERADATA = "TERADATA" + TWITTER = "TWITTER" + TIMESTREAM = "TIMESTREAM" + AMAZON_OPENSEARCH = "AMAZON_OPENSEARCH" + EXASOL = "EXASOL" + DATABRICKS = "DATABRICKS" + STARBURST = "STARBURST" + TRINO = "TRINO" + BIGQUERY = "BIGQUERY" + + +@dataclass +class DatabaseParameters: + Database: str + Host: str + Port: int + + +@dataclass +class TypeAuroraParameters(DatabaseParameters): + pass + + +@dataclass +class TypeAuroraPostgreSqlParameters(DatabaseParameters): + pass + + +@dataclass +class TypeBigQueryParameters: + ProjectId: str + + +@dataclass +class TypeDatabricksParameters: + Host: str + Port: int + SqlEndpointPath: str + + +@dataclass +class TypeMariaDbParameters(DatabaseParameters): + pass + + +@dataclass +class TypeMySqlParameters(DatabaseParameters): + pass + + +@dataclass +class TypeOracleParameters(DatabaseParameters): + pass + + +@dataclass +class TypePostgreSqlParameters(DatabaseParameters): + pass + + +@dataclass +class TypeRdsParameters: + Database: str + InstanceId: str + + +@dataclass +class TypeRedshiftParameters: + Database: str + ClusterId: Optional[str] = None + Host: Optional[str] = None + Port: Optional[int] = None + + +@dataclass +class TypeSnowflakeParameters: + Database: str + Host: str + + +@dataclass +class TypeSqlServerParameters(DatabaseParameters): + pass + + +@dataclass +class TypeDataSourceParameters: + AuroraParameters: Optional[TypeAuroraParameters] = None + AuroraPostgreSqlParameters: Optional[TypeAuroraPostgreSqlParameters] = None + BigQueryParameters: Optional[TypeBigQueryParameters] = None + DatabricksParameters: Optional[TypeDatabricksParameters] = None + MariaDbParameters: Optional[TypeMariaDbParameters] = None + MySqlParameters: Optional[TypeMySqlParameters] = None + OracleParameters: Optional[TypeOracleParameters] = None + PostgreSqlParameters: Optional[TypePostgreSqlParameters] = None + RdsParameters: Optional[TypeRdsParameters] = None + RedshiftParameters: Optional[TypeRedshiftParameters] = None + SnowflakeParameters: Optional[TypeSnowflakeParameters] = None + SqlServerParameters: Optional[TypeSqlServerParameters] = None + + +@dataclass +class DataSource: + Arn: Optional[str] = None + CreatedTime: Optional[datetime] = None + DataSourceId: Optional[str] = None + Name: Optional[str] = None + Type: Optional[DataSourceType] = None + DataSourceParameters: Optional[TypeDataSourceParameters] = None + + +@dataclass +class Folder: + """ + @see: https://docs.aws.amazon.com/quicksight/latest/APIReference/API_Folder.html + """ + + Arn: Optional[str] = None + CreatedTime: Optional[datetime] = None + FolderId: Optional[str] = None + FolderPath: Optional[List[str]] = None + FolderType: Optional[str] = None + LastUpdatedTime: Optional[datetime] = None + Name: Optional[str] = None + SharingModel: Optional[str] = None + + +@dataclass +class User: + """ + @see: https://docs.aws.amazon.com/quicksight/latest/APIReference/API_User.html + """ + + Arn: Optional[str] = None + Email: Optional[str] = None + PrincipalId: Optional[str] = None + Role: Optional[str] = None + UserName: Optional[str] = None + + +ResourceType = Union[Analysis, DataSet, Dashboard, DataSource, Folder, User] diff --git a/poetry.lock b/poetry.lock index 477545c4..6caa3d92 100644 --- a/poetry.lock +++ b/poetry.lock @@ -645,17 +645,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.35.2" +version = "1.35.19" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.35.2-py3-none-any.whl", hash = "sha256:c2f0837a259002489e59d1c30008791e3b3bb59e30e48c64e1d2d270147a4549"}, - {file = "boto3-1.35.2.tar.gz", hash = "sha256:cbf197ce28f04bc1ffa1db0aa26a1903d9bfa57a490f70537932e84367cdd15b"}, + {file = "boto3-1.35.19-py3-none-any.whl", hash = "sha256:84b3fe1727945bc3cada832d969ddb3dc0d08fce1677064ca8bdc13a89c1a143"}, + {file = "boto3-1.35.19.tar.gz", hash = "sha256:9979fe674780a0b7100eae9156d74ee374cd1638a9f61c77277e3ce712f3e496"}, ] [package.dependencies] -botocore = ">=1.35.2,<1.36.0" +botocore = ">=1.35.19,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -664,13 +664,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.35.2" +version = "1.35.19" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.35.2-py3-none-any.whl", hash = "sha256:92b168d8be79055bb25754aa34d699866d8aa66abc69f8ce99b0c191bd9c6e70"}, - {file = "botocore-1.35.2.tar.gz", hash = "sha256:96c8eb6f0baed623a1b57ca9f24cb21d5508872cf0dfebb55527a85b6dbc76ba"}, + {file = "botocore-1.35.19-py3-none-any.whl", hash = "sha256:c83f7f0cacfe7c19b109b363ebfa8736e570d24922f16ed371681f58ebab44a9"}, + {file = "botocore-1.35.19.tar.gz", hash = "sha256:42d6d8db7250cbd7899f786f9861e02cab17dc238f64d6acb976098ed9809625"}, ] [package.dependencies] @@ -682,7 +682,7 @@ urllib3 = [ ] [package.extras] -crt = ["awscrt (==0.21.2)"] +crt = ["awscrt (==0.21.5)"] [[package]] name = "cachetools" @@ -3258,13 +3258,13 @@ files = [ [[package]] name = "metaphor-models" -version = "0.38.2" +version = "0.38.3" description = "" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "metaphor_models-0.38.2-py3-none-any.whl", hash = "sha256:fe17ff8c088ecd8a79d2abc4be2a48ed305fa31534ae54e5fc348f59a9d74f75"}, - {file = "metaphor_models-0.38.2.tar.gz", hash = "sha256:676206f98f2c01a28bc4990fad97b685c6d4afb69390ad061ec6ea2ad8a687b3"}, + {file = "metaphor_models-0.38.3-py3-none-any.whl", hash = "sha256:e33810d5445aa4080d8ec204818272a7aea2d1deb44a7866335132cb35bcd0a3"}, + {file = "metaphor_models-0.38.3.tar.gz", hash = "sha256:79e8710e08e9448ee2607526fb9c54b1d75008509f79c0c141af487c15f7fea2"}, ] [[package]] @@ -6666,4 +6666,4 @@ unity-catalog = ["databricks-sdk", "databricks-sql-connector", "sqlglot"] [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.12" -content-hash = "18ea1fcc239312abb826eee454b33fe1ca0c3b847d383afb533062d517bbe6bf" +content-hash = "0f6b045d0cc3d5203c7d5488cfdb16bdfcc3ddcf9bf103ff2f20099417b1dfda" diff --git a/pyproject.toml b/pyproject.toml index 99b46723..f582b347 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.106" +version = "0.14.107" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] @@ -21,8 +21,8 @@ aws-assume-role-lib = "^2.10.0" azure-identity = { version = "^1.14.0", optional = true } azure-mgmt-datafactory = { version = "^8.0.0", optional = true } beautifulsoup4 = { version = "^4.12.3", optional = true } -boto3 = "^1.34.64" -botocore = "^1.34.64" +boto3 = "^1.35.19" +botocore = "^1.35.19" canonicaljson = "^2.0.0" confluent-kafka = { version = "^2.3.0", optional = true } databricks-sdk = { version = "^0.29.0", optional = true } @@ -42,7 +42,7 @@ llama-index-readers-confluence = { version = "^0.1.4", optional = true } llama-index-readers-notion = { version = "^0.1.6", optional = true } looker-sdk = { version = "^24.2.0", optional = true } lxml = { version = "~=5.0.0", optional = true } -metaphor-models = "0.38.2" +metaphor-models = "0.38.3" more-itertools = { version = "^10.1.0", optional = true } msal = { version = "^1.28.0", optional = true } msgraph-beta-sdk = { version = "~1.4.0", optional = true } diff --git a/tests/quick_sight/data/dashboards.json b/tests/quick_sight/data/dashboards.json new file mode 100644 index 00000000..6b70c08d --- /dev/null +++ b/tests/quick_sight/data/dashboards.json @@ -0,0 +1,131 @@ +[ + { + "ResponseMetadata": { + "RequestId": "4a45406f-cd36-4d6b-8b16-fd6e0403e2f3", + "HTTPStatusCode": 200, + "HTTPHeaders": { + "date": "Thu, 19 Sep 2024 10:34:08 GMT", + "content-type": "application/json", + "content-length": "1420", + "connection": "keep-alive", + "x-amzn-requestid": "4a45406f-cd36-4d6b-8b16-fd6e0403e2f3" + }, + "RetryAttempts": 0 + }, + "Status": 200, + "Dashboard": { + "DashboardId": "1c9b51b1-3204-4ee3-9491-e92ddded8518", + "Arn": "arn:aws:quicksight:us-west-2:123456789012:dashboard/1c9b51b1-3204-4ee3-9491-e92ddded8518", + "Name": "Sales", + "Version": { + "CreatedTime": "2024-09-12 21:01:13.244000+08:00", + "Errors": [], + "VersionNumber": 1, + "Status": "CREATION_SUCCESSFUL", + "SourceEntityArn": "arn:aws:quicksight:us-west-2:123456789012:analysis/0176e428-e196-4cd8-b651-30971b6e6aa4", + "DataSetArns": [ + "arn:aws:quicksight:us-west-2:123456789012:dataset/7bcddd7f-ed98-4e91-8064-9ae885f0376a" + ], + "Sheets": [ + { + "SheetId": "1c9b51b1-3204-4ee3-9491-e92ddded8518_2ec249d2-c888-47c0-a7bb-f46e4e7d0e26", + "Name": "Revenue and Cost by Country" + } + ] + }, + "CreatedTime": "2024-09-12 21:01:13.249000+08:00", + "LastPublishedTime": "2024-09-12 21:01:13.249000+08:00", + "LastUpdatedTime": "2024-09-12 21:01:13.244000+08:00", + "LinkEntities": [ + "arn:aws:quicksight:us-west-2:123456789012:analysis/0176e428-e196-4cd8-b651-30971b6e6aa4" + ] + }, + "RequestId": "4a45406f-cd36-4d6b-8b16-fd6e0403e2f3" + }, + { + "ResponseMetadata": { + "RequestId": "0dc50b4e-5138-4d8f-8670-ebfae0f4e6bc", + "HTTPStatusCode": 200, + "HTTPHeaders": { + "date": "Thu, 19 Sep 2024 10:34:08 GMT", + "content-type": "application/json", + "content-length": "1426", + "connection": "keep-alive", + "x-amzn-requestid": "0dc50b4e-5138-4d8f-8670-ebfae0f4e6bc" + }, + "RetryAttempts": 0 + }, + "Status": 200, + "Dashboard": { + "DashboardId": "eb39c0d9-d071-43e6-b75a-cc303752b702", + "Arn": "arn:aws:quicksight:us-west-2:123456789012:dashboard/eb39c0d9-d071-43e6-b75a-cc303752b702", + "Name": "Bike rides", + "Version": { + "CreatedTime": "2024-09-18 16:24:00.923000+08:00", + "Errors": [], + "VersionNumber": 1, + "Status": "CREATION_SUCCESSFUL", + "SourceEntityArn": "arn:aws:quicksight:us-west-2:123456789012:analysis/b00a97f9-f822-49e5-af92-3f42558af443", + "DataSetArns": [ + "arn:aws:quicksight:us-west-2:123456789012:dataset/7c6a5c47-fbc7-4307-afd3-57f79864593e" + ], + "Sheets": [ + { + "SheetId": "eb39c0d9-d071-43e6-b75a-cc303752b702_44c0a3d2-0d27-45a9-8594-4c192bf77f52", + "Name": "Relation between minutes and docks" + } + ] + }, + "CreatedTime": "2024-09-18 16:24:00.929000+08:00", + "LastPublishedTime": "2024-09-18 16:24:00.929000+08:00", + "LastUpdatedTime": "2024-09-18 16:24:00.923000+08:00", + "LinkEntities": [ + "arn:aws:quicksight:us-west-2:123456789012:analysis/b00a97f9-f822-49e5-af92-3f42558af443" + ] + }, + "RequestId": "0dc50b4e-5138-4d8f-8670-ebfae0f4e6bc" + }, + { + "ResponseMetadata": { + "RequestId": "823923d6-3cb3-4c8f-b636-381b01ad7862", + "HTTPStatusCode": 200, + "HTTPHeaders": { + "date": "Thu, 19 Sep 2024 10:34:09 GMT", + "content-type": "application/json", + "content-length": "1399", + "connection": "keep-alive", + "x-amzn-requestid": "823923d6-3cb3-4c8f-b636-381b01ad7862" + }, + "RetryAttempts": 0 + }, + "Status": 200, + "Dashboard": { + "DashboardId": "fea7f6ce-ac23-44f4-bf9b-fb03b535403e", + "Arn": "arn:aws:quicksight:us-west-2:123456789012:dashboard/fea7f6ce-ac23-44f4-bf9b-fb03b535403e", + "Name": "Jaffle Shop Orders", + "Version": { + "CreatedTime": "2024-09-18 16:09:35.228000+08:00", + "Errors": [], + "VersionNumber": 1, + "Status": "CREATION_SUCCESSFUL", + "SourceEntityArn": "arn:aws:quicksight:us-west-2:123456789012:analysis/b1af962f-7251-426a-ac36-286156e3ce31", + "DataSetArns": [ + "arn:aws:quicksight:us-west-2:123456789012:dataset/fb1b23e7-ff1f-47b7-a04e-33b30847e9a7" + ], + "Sheets": [ + { + "SheetId": "fea7f6ce-ac23-44f4-bf9b-fb03b535403e_74f6d342-3f1b-4924-a609-178b72bf06b1", + "Name": "Sheet 1" + } + ] + }, + "CreatedTime": "2024-09-18 16:09:35.235000+08:00", + "LastPublishedTime": "2024-09-18 16:09:35.235000+08:00", + "LastUpdatedTime": "2024-09-18 16:09:35.228000+08:00", + "LinkEntities": [ + "arn:aws:quicksight:us-west-2:123456789012:analysis/b1af962f-7251-426a-ac36-286156e3ce31" + ] + }, + "RequestId": "823923d6-3cb3-4c8f-b636-381b01ad7862" + } +] diff --git a/tests/quick_sight/data/data_sources.json b/tests/quick_sight/data/data_sources.json new file mode 100644 index 00000000..76cc772c --- /dev/null +++ b/tests/quick_sight/data/data_sources.json @@ -0,0 +1,110 @@ +[ + { + "ResponseMetadata": { + "RequestId": "d10d8d0b-e621-4213-b9e5-06f107834a5d", + "HTTPStatusCode": 200, + "HTTPHeaders": { + "date": "Thu, 19 Sep 2024 10:34:13 GMT", + "content-type": "application/json", + "content-length": "1951", + "connection": "keep-alive", + "x-amzn-requestid": "d10d8d0b-e621-4213-b9e5-06f107834a5d" + }, + "RetryAttempts": 0 + }, + "Status": 200, + "DataSource": { + "Arn": "arn:aws:quicksight:us-west-2:123456789012:datasource/01d81128-094e-4f2c-93c5-6d9f2c049ecd", + "DataSourceId": "01d81128-094e-4f2c-93c5-6d9f2c049ecd", + "Name": "Snowflake", + "Type": "SNOWFLAKE", + "Status": "CREATION_SUCCESSFUL", + "CreatedTime": "2024-09-18 16:13:10.685000+08:00", + "LastUpdatedTime": "2024-09-18 16:13:10.685000+08:00", + "DataSourceParameters": { + "SnowflakeParameters": { + "Host": "account.snowflakecomputing.com", + "Database": "ACME", + "Warehouse": "COMPUTE_WH" + } + }, + "SslProperties": { + "DisableSsl": false + } + }, + "RequestId": "d10d8d0b-e621-4213-b9e5-06f107834a5d" + }, + { + "ResponseMetadata": { + "RequestId": "6423da10-ad54-46a4-aca4-edbcc08b67f7", + "HTTPStatusCode": 200, + "HTTPHeaders": { + "date": "Thu, 19 Sep 2024 10:34:14 GMT", + "content-type": "application/json", + "content-length": "1603", + "connection": "keep-alive", + "x-amzn-requestid": "6423da10-ad54-46a4-aca4-edbcc08b67f7" + }, + "RetryAttempts": 0 + }, + "Status": 200, + "DataSource": { + "Arn": "arn:aws:quicksight:us-west-2:123456789012:datasource/7e079648-2501-423a-be75-9ee19f351aad", + "DataSourceId": "7e079648-2501-423a-be75-9ee19f351aad", + "Name": "Postgres (Public network)", + "Type": "POSTGRESQL", + "Status": "CREATION_SUCCESSFUL", + "CreatedTime": "2024-09-18 15:58:45.404000+08:00", + "LastUpdatedTime": "2024-09-18 15:58:45.404000+08:00", + "DataSourceParameters": { + "PostgreSqlParameters": { + "Host": "127.0.0.1", + "Port": 5432, + "Database": "metaphor" + } + }, + "SslProperties": { + "DisableSsl": false + } + }, + "RequestId": "6423da10-ad54-46a4-aca4-edbcc08b67f7" + }, + { + "ResponseMetadata": { + "RequestId": "d7b2636b-a9db-4655-9f35-19d407ffd9b8", + "HTTPStatusCode": 200, + "HTTPHeaders": { + "date": "Thu, 19 Sep 2024 10:34:14 GMT", + "content-type": "application/json", + "content-length": "1766", + "connection": "keep-alive", + "x-amzn-requestid": "d7b2636b-a9db-4655-9f35-19d407ffd9b8" + }, + "RetryAttempts": 0 + }, + "Status": 200, + "DataSource": { + "Arn": "arn:aws:quicksight:us-west-2:123456789012:datasource/d2603795-6ef1-403a-84f7-898fabf9458c", + "DataSourceId": "d2603795-6ef1-403a-84f7-898fabf9458c", + "Name": "Redshift", + "Type": "REDSHIFT", + "Status": "CREATION_SUCCESSFUL", + "CreatedTime": "2024-09-12 20:58:31.910000+08:00", + "LastUpdatedTime": "2024-09-12 20:58:31.910000+08:00", + "DataSourceParameters": { + "RedshiftParameters": { + "Host": "127.0.0.1", + "Port": 5439, + "Database": "metaphor" + } + }, + "VpcConnectionProperties": { + "VpcConnectionArn": "arn:aws:quicksight:us-west-2:123456789012:vpcConnection/85b6925d-c221-43ad-af9a-75cf0d5789c4" + }, + "SslProperties": { + "DisableSsl": false + } + }, + "RequestId": "d7b2636b-a9db-4655-9f35-19d407ffd9b8" + } +] diff --git a/tests/quick_sight/data/datasets.json b/tests/quick_sight/data/datasets.json new file mode 100644 index 00000000..f516bad4 --- /dev/null +++ b/tests/quick_sight/data/datasets.json @@ -0,0 +1,718 @@ +[ + { + "ResponseMetadata": { + "RequestId": "084a0be5-30bb-4a0e-9175-756a44ea021c", + "HTTPStatusCode": 200, + "HTTPHeaders": { + "date": "Thu, 19 Sep 2024 10:34:06 GMT", + "content-type": "application/json", + "content-length": "4477", + "connection": "keep-alive", + "x-amzn-requestid": "084a0be5-30bb-4a0e-9175-756a44ea021c" + }, + "RetryAttempts": 0 + }, + "Status": 200, + "DataSet": { + "Arn": "arn:aws:quicksight:us-west-2:123456789012:dataset/6f516e19-84f8-4d17-9bd9-feecf1bdc346", + "DataSetId": "6f516e19-84f8-4d17-9bd9-feecf1bdc346", + "Name": "locations", + "CreatedTime": "2024-09-19 16:59:00.434000+08:00", + "LastUpdatedTime": "2024-09-19 17:30:15.605000+08:00", + "PhysicalTableMap": { + "f3b260dc-4638-4620-91fe-36f006936052": { + "RelationalTable": { + "DataSourceArn": "arn:aws:quicksight:us-west-2:123456789012:datasource/7e079648-2501-423a-be75-9ee19f351aad", + "Schema": "jaffle_shop", + "Name": "locations", + "InputColumns": [ + { + "Name": "location_id", + "Type": "STRING" + }, + { + "Name": "location_name", + "Type": "STRING" + }, + { + "Name": "tax_rate", + "Type": "DECIMAL", + "SubType": "FLOAT" + }, + { + "Name": "opened_date", + "Type": "DATETIME" + } + ] + } + } + }, + "LogicalTableMap": { + "2c824bdb-87ae-43ff-bb28-b31b089be133": { + "Alias": "orders", + "DataTransforms": [ + { + "RenameColumnOperation": { + "ColumnName": "location_id", + "NewColumnName": "location_id[orders]" + } + } + ], + "Source": { + "DataSetArn": "arn:aws:quicksight:us-west-2:123456789012:dataset/fb1b23e7-ff1f-47b7-a04e-33b30847e9a7" + } + }, + "b46b6946-3391-41cf-8f1f-57aa6e5e3d4e": { + "Alias": "Intermediate Table", + "DataTransforms": [ + { + "CreateColumnsOperation": { + "Columns": [ + { + "ColumnName": "revenue", + "ColumnId": "a63c9ad8-6806-4ac0-ab2d-e77cbdbd178e", + "Expression": "{order_total} - {order_cost}" + } + ] + } + }, + { + "ProjectOperation": { + "ProjectedColumns": [ + "location_name", + "opened_date", + "order_total", + "tax_paid", + "ordered_at", + "order_cost", + "customer_name", + "customer_type", + "revenue" + ] + } + } + ], + "Source": { + "JoinInstruction": { + "LeftOperand": "ff3db8ef-966c-4631-b33f-867fdbe0c008", + "RightOperand": "2c824bdb-87ae-43ff-bb28-b31b089be133", + "Type": "RIGHT", + "OnClause": "{location_id} = {location_id[orders]}" + } + } + }, + "ff3db8ef-966c-4631-b33f-867fdbe0c008": { + "Alias": "locations", + "Source": { + "PhysicalTableId": "f3b260dc-4638-4620-91fe-36f006936052" + } + } + }, + "OutputColumns": [ + { + "Name": "location_name", + "Type": "STRING" + }, + { + "Name": "opened_date", + "Type": "DATETIME" + }, + { + "Name": "order_total", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "tax_paid", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "ordered_at", + "Type": "DATETIME" + }, + { + "Name": "order_cost", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "customer_name", + "Type": "STRING" + }, + { + "Name": "customer_type", + "Type": "STRING" + }, + { + "Name": "revenue", + "Type": "DECIMAL", + "SubType": "FIXED" + } + ], + "ImportMode": "SPICE", + "ConsumedSpiceCapacityInBytes": 10548419, + "FieldFolders": {}, + "DataSetUsageConfiguration": { + "DisableUseAsDirectQuerySource": false, + "DisableUseAsImportedSource": false + } + }, + "RequestId": "084a0be5-30bb-4a0e-9175-756a44ea021c" + }, + { + "ResponseMetadata": { + "RequestId": "ce96b506-6d79-41b7-bdfe-0a9ad146ac6b", + "HTTPStatusCode": 200, + "HTTPHeaders": { + "date": "Thu, 19 Sep 2024 10:34:07 GMT", + "content-type": "application/json", + "content-length": "4260", + "connection": "keep-alive", + "x-amzn-requestid": "ce96b506-6d79-41b7-bdfe-0a9ad146ac6b" + }, + "RetryAttempts": 0 + }, + "Status": 200, + "DataSet": { + "Arn": "arn:aws:quicksight:us-west-2:123456789012:dataset/7bcddd7f-ed98-4e91-8064-9ae885f0376a", + "DataSetId": "7bcddd7f-ed98-4e91-8064-9ae885f0376a", + "Name": "sample_sales_records", + "CreatedTime": "2024-09-12 20:59:05.938000+08:00", + "LastUpdatedTime": "2024-09-19 12:20:38.550000+08:00", + "PhysicalTableMap": { + "501bd127-d6a3-45ac-843c-6fee1ccb1aad": { + "RelationalTable": { + "DataSourceArn": "arn:aws:quicksight:us-west-2:123456789012:datasource/d2603795-6ef1-403a-84f7-898fabf9458c", + "Schema": "public", + "Name": "sample_sales_records", + "InputColumns": [ + { + "Name": "region", + "Type": "STRING" + }, + { + "Name": "country", + "Type": "STRING" + }, + { + "Name": "item type", + "Type": "STRING" + }, + { + "Name": "sales channel", + "Type": "STRING" + }, + { + "Name": "order priority", + "Type": "STRING" + }, + { + "Name": "order date", + "Type": "STRING" + }, + { + "Name": "order id", + "Type": "INTEGER" + }, + { + "Name": "ship date", + "Type": "STRING" + }, + { + "Name": "units sold", + "Type": "INTEGER" + }, + { + "Name": "unit price", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "unit cost", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "total revenue", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "total cost", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "total profit", + "Type": "DECIMAL", + "SubType": "FIXED" + } + ] + } + } + }, + "LogicalTableMap": { + "0d6f3fd0-9d90-4e5e-b22f-8a95324ddc19": { + "Alias": "sample_sales_records", + "DataTransforms": [ + { + "TagColumnOperation": { + "ColumnName": "region", + "Tags": [ + { + "ColumnGeographicRole": "STATE" + } + ] + } + }, + { + "TagColumnOperation": { + "ColumnName": "country", + "Tags": [ + { + "ColumnGeographicRole": "COUNTRY" + } + ] + } + } + ], + "Source": { + "PhysicalTableId": "501bd127-d6a3-45ac-843c-6fee1ccb1aad" + } + } + }, + "OutputColumns": [ + { + "Name": "region", + "Type": "STRING" + }, + { + "Name": "country", + "Type": "STRING" + }, + { + "Name": "item type", + "Type": "STRING" + }, + { + "Name": "sales channel", + "Type": "STRING" + }, + { + "Name": "order priority", + "Type": "STRING" + }, + { + "Name": "order date", + "Type": "STRING" + }, + { + "Name": "order id", + "Type": "INTEGER" + }, + { + "Name": "ship date", + "Type": "STRING" + }, + { + "Name": "units sold", + "Type": "INTEGER" + }, + { + "Name": "unit price", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "unit cost", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "total revenue", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "total cost", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "total profit", + "Type": "DECIMAL", + "SubType": "FIXED" + } + ], + "ImportMode": "SPICE", + "ConsumedSpiceCapacityInBytes": 31074367, + "DataSetUsageConfiguration": { + "DisableUseAsDirectQuerySource": false, + "DisableUseAsImportedSource": false + } + }, + "RequestId": "ce96b506-6d79-41b7-bdfe-0a9ad146ac6b" + }, + { + "ResponseMetadata": { + "RequestId": "dcae33c7-adbc-4aa9-bbaa-c6b91f4da51f", + "HTTPStatusCode": 200, + "HTTPHeaders": { + "date": "Thu, 19 Sep 2024 10:34:07 GMT", + "content-type": "application/json", + "content-length": "3749", + "connection": "keep-alive", + "x-amzn-requestid": "dcae33c7-adbc-4aa9-bbaa-c6b91f4da51f" + }, + "RetryAttempts": 0 + }, + "Status": 200, + "DataSet": { + "Arn": "arn:aws:quicksight:us-west-2:123456789012:dataset/7c6a5c47-fbc7-4307-afd3-57f79864593e", + "DataSetId": "7c6a5c47-fbc7-4307-afd3-57f79864593e", + "Name": "Bike data", + "CreatedTime": "2024-09-18 16:17:33.463000+08:00", + "LastUpdatedTime": "2024-09-18 16:42:39.096000+08:00", + "PhysicalTableMap": { + "1b016641-23c2-4b17-ab94-c773333bc76d": { + "CustomSql": { + "DataSourceArn": "arn:aws:quicksight:us-west-2:123456789012:datasource/01d81128-094e-4f2c-93c5-6d9f2c049ecd", + "Name": "Station Docks", + "SqlQuery": "SELECT docks_count, id FROM RIDE_SHARE.RAW_BIKE_STATIONS", + "Columns": [ + { + "Name": "DOCKS_COUNT", + "Type": "INTEGER" + }, + { + "Name": "ID", + "Type": "INTEGER" + } + ] + } + }, + "2a463fad-08c9-4a63-9aab-a786f1b41752": { + "CustomSql": { + "DataSourceArn": "arn:aws:quicksight:us-west-2:123456789012:datasource/01d81128-094e-4f2c-93c5-6d9f2c049ecd", + "Name": "Total Minutes By Start", + "SqlQuery": "SELECT total_minutes, start_station_name, month, start_station_id FROM RIDE_SHARE.CLEANED_BIKE_RIDES", + "Columns": [ + { + "Name": "TOTAL_MINUTES", + "Type": "DECIMAL" + }, + { + "Name": "START_STATION_NAME", + "Type": "STRING" + }, + { + "Name": "MONTH", + "Type": "INTEGER" + }, + { + "Name": "START_STATION_ID", + "Type": "INTEGER" + } + ] + } + } + }, + "LogicalTableMap": { + "12f0dcb4-ff96-4123-8568-00781596eb37": { + "Alias": "Station Docks", + "Source": { + "PhysicalTableId": "1b016641-23c2-4b17-ab94-c773333bc76d" + } + }, + "82e644be-26ce-44a0-bbc9-95cc88e16a5c": { + "Alias": "Total Minutes By Start", + "Source": { + "PhysicalTableId": "2a463fad-08c9-4a63-9aab-a786f1b41752" + } + }, + "8c374d17-42ce-46d8-9168-84ec82670b97": { + "Alias": "Intermediate Table", + "DataTransforms": [ + { + "ProjectOperation": { + "ProjectedColumns": [ + "TOTAL_MINUTES", + "START_STATION_NAME", + "MONTH", + "START_STATION_ID", + "DOCKS_COUNT" + ] + } + } + ], + "Source": { + "JoinInstruction": { + "LeftOperand": "82e644be-26ce-44a0-bbc9-95cc88e16a5c", + "RightOperand": "12f0dcb4-ff96-4123-8568-00781596eb37", + "RightJoinKeyProperties": { + "UniqueKey": true + }, + "Type": "LEFT", + "OnClause": "{START_STATION_ID} = {ID}" + } + } + } + }, + "OutputColumns": [ + { + "Name": "TOTAL_MINUTES", + "Type": "DECIMAL" + }, + { + "Name": "START_STATION_NAME", + "Type": "STRING" + }, + { + "Name": "MONTH", + "Type": "INTEGER" + }, + { + "Name": "START_STATION_ID", + "Type": "INTEGER" + }, + { + "Name": "DOCKS_COUNT", + "Type": "INTEGER" + } + ], + "ImportMode": "DIRECT_QUERY", + "ConsumedSpiceCapacityInBytes": 0, + "FieldFolders": {}, + "DataSetUsageConfiguration": { + "DisableUseAsDirectQuerySource": false, + "DisableUseAsImportedSource": false + } + }, + "RequestId": "dcae33c7-adbc-4aa9-bbaa-c6b91f4da51f" + }, + { + "ResponseMetadata": { + "RequestId": "08dfb8bf-a945-4303-b98c-6356cc7086c6", + "HTTPStatusCode": 200, + "HTTPHeaders": { + "date": "Thu, 19 Sep 2024 10:34:07 GMT", + "content-type": "application/json", + "content-length": "5236", + "connection": "keep-alive", + "x-amzn-requestid": "08dfb8bf-a945-4303-b98c-6356cc7086c6" + }, + "RetryAttempts": 0 + }, + "Status": 200, + "DataSet": { + "Arn": "arn:aws:quicksight:us-west-2:123456789012:dataset/fb1b23e7-ff1f-47b7-a04e-33b30847e9a7", + "DataSetId": "fb1b23e7-ff1f-47b7-a04e-33b30847e9a7", + "Name": "orders", + "CreatedTime": "2024-09-18 15:59:12.164000+08:00", + "LastUpdatedTime": "2024-09-18 16:09:07.953000+08:00", + "PhysicalTableMap": { + "48cf151b-0e89-4707-a901-871f69b22017": { + "RelationalTable": { + "DataSourceArn": "arn:aws:quicksight:us-west-2:123456789012:datasource/7e079648-2501-423a-be75-9ee19f351aad", + "Schema": "jaffle_shop", + "Name": "customers", + "InputColumns": [ + { + "Name": "customer_id", + "Type": "STRING" + }, + { + "Name": "customer_name", + "Type": "STRING" + }, + { + "Name": "count_lifetime_orders", + "Type": "INTEGER" + }, + { + "Name": "first_ordered_at", + "Type": "DATETIME" + }, + { + "Name": "last_ordered_at", + "Type": "DATETIME" + }, + { + "Name": "lifetime_spend_pretax", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "lifetime_spend", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "customer_type", + "Type": "STRING" + } + ] + } + }, + "4e4695f3-6178-447b-947e-289363740a82": { + "RelationalTable": { + "DataSourceArn": "arn:aws:quicksight:us-west-2:123456789012:datasource/7e079648-2501-423a-be75-9ee19f351aad", + "Schema": "jaffle_shop", + "Name": "orders", + "InputColumns": [ + { + "Name": "order_id", + "Type": "STRING" + }, + { + "Name": "location_id", + "Type": "STRING" + }, + { + "Name": "customer_id", + "Type": "STRING" + }, + { + "Name": "order_total", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "tax_paid", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "ordered_at", + "Type": "DATETIME" + }, + { + "Name": "is_food_order", + "Type": "BIT" + }, + { + "Name": "is_drink_order", + "Type": "BIT" + }, + { + "Name": "order_cost", + "Type": "DECIMAL", + "SubType": "FIXED" + } + ] + } + } + }, + "LogicalTableMap": { + "8f75cca0-6d39-48bd-9c4a-9d3a7015562e": { + "Alias": "Intermediate Table", + "DataTransforms": [ + { + "ProjectOperation": { + "ProjectedColumns": [ + "order_id", + "location_id", + "customer_id", + "order_total", + "tax_paid", + "ordered_at", + "is_food_order", + "is_drink_order", + "order_cost", + "customer_name", + "customer_type" + ] + } + } + ], + "Source": { + "JoinInstruction": { + "LeftOperand": "a7b9ddbf-9fdf-48df-8952-b75b972fcc5d", + "RightOperand": "fb24929a-5175-4485-b934-5210fbd09dae", + "Type": "LEFT", + "OnClause": "{customer_id} = {customer_id[customers]}" + } + } + }, + "a7b9ddbf-9fdf-48df-8952-b75b972fcc5d": { + "Alias": "orders", + "Source": { + "PhysicalTableId": "4e4695f3-6178-447b-947e-289363740a82" + } + }, + "fb24929a-5175-4485-b934-5210fbd09dae": { + "Alias": "customers", + "DataTransforms": [ + { + "RenameColumnOperation": { + "ColumnName": "customer_id", + "NewColumnName": "customer_id[customers]" + } + } + ], + "Source": { + "PhysicalTableId": "48cf151b-0e89-4707-a901-871f69b22017" + } + } + }, + "OutputColumns": [ + { + "Name": "order_id", + "Type": "STRING" + }, + { + "Name": "location_id", + "Type": "STRING" + }, + { + "Name": "customer_id", + "Type": "STRING" + }, + { + "Name": "order_total", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "tax_paid", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "ordered_at", + "Type": "DATETIME" + }, + { + "Name": "is_food_order", + "Type": "INTEGER" + }, + { + "Name": "is_drink_order", + "Type": "INTEGER" + }, + { + "Name": "order_cost", + "Type": "DECIMAL", + "SubType": "FIXED" + }, + { + "Name": "customer_name", + "Type": "STRING" + }, + { + "Name": "customer_type", + "Type": "STRING" + } + ], + "ImportMode": "SPICE", + "ConsumedSpiceCapacityInBytes": 19224167, + "FieldFolders": {}, + "DataSetUsageConfiguration": { + "DisableUseAsDirectQuerySource": false, + "DisableUseAsImportedSource": false + } + }, + "RequestId": "08dfb8bf-a945-4303-b98c-6356cc7086c6" + } +] diff --git a/tests/quick_sight/expected.json b/tests/quick_sight/expected.json new file mode 100644 index 00000000..f2557a61 --- /dev/null +++ b/tests/quick_sight/expected.json @@ -0,0 +1,709 @@ +[ + { + "entityUpstream": { + "fieldMappings": [ + { + "destination": "order_total", + "sources": [ + { + "field": "order_total", + "sourceEntityId": "VIRTUAL_VIEW~543D1D7F1F0469635E8A454303BA2913" + } + ] + }, + { + "destination": "tax_paid", + "sources": [ + { + "field": "tax_paid", + "sourceEntityId": "VIRTUAL_VIEW~543D1D7F1F0469635E8A454303BA2913" + } + ] + }, + { + "destination": "ordered_at", + "sources": [ + { + "field": "ordered_at", + "sourceEntityId": "VIRTUAL_VIEW~543D1D7F1F0469635E8A454303BA2913" + } + ] + }, + { + "destination": "order_cost", + "sources": [ + { + "field": "order_cost", + "sourceEntityId": "VIRTUAL_VIEW~543D1D7F1F0469635E8A454303BA2913" + } + ] + }, + { + "destination": "customer_name", + "sources": [ + { + "field": "customer_name", + "sourceEntityId": "VIRTUAL_VIEW~543D1D7F1F0469635E8A454303BA2913" + } + ] + }, + { + "destination": "customer_type", + "sources": [ + { + "field": "customer_type", + "sourceEntityId": "VIRTUAL_VIEW~543D1D7F1F0469635E8A454303BA2913" + } + ] + }, + { + "destination": "revenue", + "sources": [] + }, + { + "destination": "location_id", + "sources": [ + { + "field": "location_id", + "sourceEntityId": "DATASET~D35648E120DEEA5750A87336C6AA2D25" + } + ] + }, + { + "destination": "location_name", + "sources": [ + { + "field": "location_name", + "sourceEntityId": "DATASET~D35648E120DEEA5750A87336C6AA2D25" + } + ] + }, + { + "destination": "tax_rate", + "sources": [ + { + "field": "tax_rate", + "sourceEntityId": "DATASET~D35648E120DEEA5750A87336C6AA2D25" + } + ] + }, + { + "destination": "opened_date", + "sources": [ + { + "field": "opened_date", + "sourceEntityId": "DATASET~D35648E120DEEA5750A87336C6AA2D25" + } + ] + } + ], + "sourceEntities": [ + "DATASET~D35648E120DEEA5750A87336C6AA2D25", + "VIRTUAL_VIEW~543D1D7F1F0469635E8A454303BA2913" + ] + }, + "logicalId": { + "name": "arn:aws:quicksight:us-west-2:123456789012:dataset/6f516e19-84f8-4d17-9bd9-feecf1bdc346", + "type": "QUICK_SIGHT" + }, + "schema": { + "fields": [ + { + "fieldName": "location_name", + "fieldPath": "location_name", + "type": "STRING" + }, + { + "fieldName": "opened_date", + "fieldPath": "opened_date", + "type": "DATETIME" + }, + { + "fieldName": "order_total", + "fieldPath": "order_total", + "type": "DECIMAL" + }, + { + "fieldName": "tax_paid", + "fieldPath": "tax_paid", + "type": "DECIMAL" + }, + { + "fieldName": "ordered_at", + "fieldPath": "ordered_at", + "type": "DATETIME" + }, + { + "fieldName": "order_cost", + "fieldPath": "order_cost", + "type": "DECIMAL" + }, + { + "fieldName": "customer_name", + "fieldPath": "customer_name", + "type": "STRING" + }, + { + "fieldName": "customer_type", + "fieldPath": "customer_type", + "type": "STRING" + }, + { + "fieldName": "revenue", + "fieldPath": "revenue", + "formula": "{order_total} - {order_cost}", + "optionalType": "FORMULA", + "type": "DECIMAL" + } + ] + }, + "sourceInfo": { + "createdAtSource": "2024-09-19T16:59:00.434000+08:00", + "lastUpdated": "2024-09-19T17:30:15.605000+08:00" + }, + "structure": { + "name": "locations" + } + }, + { + "entityUpstream": { + "fieldMappings": [ + { + "destination": "region", + "sources": [ + { + "field": "region", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "country", + "sources": [ + { + "field": "country", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "item type", + "sources": [ + { + "field": "item type", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "sales channel", + "sources": [ + { + "field": "sales channel", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "order priority", + "sources": [ + { + "field": "order priority", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "order date", + "sources": [ + { + "field": "order date", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "order id", + "sources": [ + { + "field": "order id", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "ship date", + "sources": [ + { + "field": "ship date", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "units sold", + "sources": [ + { + "field": "units sold", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "unit price", + "sources": [ + { + "field": "unit price", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "unit cost", + "sources": [ + { + "field": "unit cost", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "total revenue", + "sources": [ + { + "field": "total revenue", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "total cost", + "sources": [ + { + "field": "total cost", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + }, + { + "destination": "total profit", + "sources": [ + { + "field": "total profit", + "sourceEntityId": "DATASET~B639ACB48EEE0795206A61803BC37DDF" + } + ] + } + ], + "sourceEntities": [ + "DATASET~B639ACB48EEE0795206A61803BC37DDF" + ] + }, + "logicalId": { + "name": "arn:aws:quicksight:us-west-2:123456789012:dataset/7bcddd7f-ed98-4e91-8064-9ae885f0376a", + "type": "QUICK_SIGHT" + }, + "schema": { + "fields": [ + { + "fieldName": "region", + "fieldPath": "region", + "type": "STRING" + }, + { + "fieldName": "country", + "fieldPath": "country", + "type": "STRING" + }, + { + "fieldName": "item type", + "fieldPath": "item type", + "type": "STRING" + }, + { + "fieldName": "sales channel", + "fieldPath": "sales channel", + "type": "STRING" + }, + { + "fieldName": "order priority", + "fieldPath": "order priority", + "type": "STRING" + }, + { + "fieldName": "order date", + "fieldPath": "order date", + "type": "STRING" + }, + { + "fieldName": "order id", + "fieldPath": "order id", + "type": "INTEGER" + }, + { + "fieldName": "ship date", + "fieldPath": "ship date", + "type": "STRING" + }, + { + "fieldName": "units sold", + "fieldPath": "units sold", + "type": "INTEGER" + }, + { + "fieldName": "unit price", + "fieldPath": "unit price", + "type": "DECIMAL" + }, + { + "fieldName": "unit cost", + "fieldPath": "unit cost", + "type": "DECIMAL" + }, + { + "fieldName": "total revenue", + "fieldPath": "total revenue", + "type": "DECIMAL" + }, + { + "fieldName": "total cost", + "fieldPath": "total cost", + "type": "DECIMAL" + }, + { + "fieldName": "total profit", + "fieldPath": "total profit", + "type": "DECIMAL" + } + ] + }, + "sourceInfo": { + "createdAtSource": "2024-09-12T20:59:05.938000+08:00", + "lastUpdated": "2024-09-19T12:20:38.550000+08:00" + }, + "structure": { + "name": "sample_sales_records" + } + }, + { + "entityUpstream": { + "fieldMappings": [ + { + "destination": "docks_count", + "sources": [] + }, + { + "destination": "total_minutes", + "sources": [] + }, + { + "destination": "start_station_name", + "sources": [] + }, + { + "destination": "month", + "sources": [] + }, + { + "destination": "start_station_id", + "sources": [] + } + ], + "sourceEntities": [ + "DATASET~83E8304683CD6C30CA41557A39C4DF25", + "DATASET~E2EB9491F5BDD97D1591D2454917F450" + ] + }, + "logicalId": { + "name": "arn:aws:quicksight:us-west-2:123456789012:dataset/7c6a5c47-fbc7-4307-afd3-57f79864593e", + "type": "QUICK_SIGHT" + }, + "schema": { + "fields": [ + { + "fieldName": "TOTAL_MINUTES", + "fieldPath": "total_minutes", + "type": "DECIMAL" + }, + { + "fieldName": "START_STATION_NAME", + "fieldPath": "start_station_name", + "type": "STRING" + }, + { + "fieldName": "MONTH", + "fieldPath": "month", + "type": "INTEGER" + }, + { + "fieldName": "START_STATION_ID", + "fieldPath": "start_station_id", + "type": "INTEGER" + }, + { + "fieldName": "DOCKS_COUNT", + "fieldPath": "docks_count", + "type": "INTEGER" + } + ] + }, + "sourceInfo": { + "createdAtSource": "2024-09-18T16:17:33.463000+08:00", + "lastUpdated": "2024-09-18T16:42:39.096000+08:00" + }, + "structure": { + "name": "Bike data" + } + }, + { + "entityUpstream": { + "fieldMappings": [ + { + "destination": "order_id", + "sources": [ + { + "field": "order_id", + "sourceEntityId": "DATASET~1D56ACA83CDEEDC47163BA86B7ADF5C5" + } + ] + }, + { + "destination": "location_id", + "sources": [ + { + "field": "location_id", + "sourceEntityId": "DATASET~1D56ACA83CDEEDC47163BA86B7ADF5C5" + } + ] + }, + { + "destination": "order_total", + "sources": [ + { + "field": "order_total", + "sourceEntityId": "DATASET~1D56ACA83CDEEDC47163BA86B7ADF5C5" + } + ] + }, + { + "destination": "tax_paid", + "sources": [ + { + "field": "tax_paid", + "sourceEntityId": "DATASET~1D56ACA83CDEEDC47163BA86B7ADF5C5" + } + ] + }, + { + "destination": "ordered_at", + "sources": [ + { + "field": "ordered_at", + "sourceEntityId": "DATASET~1D56ACA83CDEEDC47163BA86B7ADF5C5" + } + ] + }, + { + "destination": "is_food_order", + "sources": [ + { + "field": "is_food_order", + "sourceEntityId": "DATASET~1D56ACA83CDEEDC47163BA86B7ADF5C5" + } + ] + }, + { + "destination": "is_drink_order", + "sources": [ + { + "field": "is_drink_order", + "sourceEntityId": "DATASET~1D56ACA83CDEEDC47163BA86B7ADF5C5" + } + ] + }, + { + "destination": "order_cost", + "sources": [ + { + "field": "order_cost", + "sourceEntityId": "DATASET~1D56ACA83CDEEDC47163BA86B7ADF5C5" + } + ] + }, + { + "destination": "customer_name", + "sources": [ + { + "field": "customer_name", + "sourceEntityId": "DATASET~F6D22C1B6C06D037407B74D2D708058E" + } + ] + }, + { + "destination": "customer_type", + "sources": [ + { + "field": "customer_type", + "sourceEntityId": "DATASET~F6D22C1B6C06D037407B74D2D708058E" + } + ] + } + ], + "sourceEntities": [ + "DATASET~1D56ACA83CDEEDC47163BA86B7ADF5C5", + "DATASET~F6D22C1B6C06D037407B74D2D708058E" + ] + }, + "logicalId": { + "name": "arn:aws:quicksight:us-west-2:123456789012:dataset/fb1b23e7-ff1f-47b7-a04e-33b30847e9a7", + "type": "QUICK_SIGHT" + }, + "schema": { + "fields": [ + { + "fieldName": "order_id", + "fieldPath": "order_id", + "type": "STRING" + }, + { + "fieldName": "location_id", + "fieldPath": "location_id", + "type": "STRING" + }, + { + "fieldName": "customer_id", + "fieldPath": "customer_id", + "type": "STRING" + }, + { + "fieldName": "order_total", + "fieldPath": "order_total", + "type": "DECIMAL" + }, + { + "fieldName": "tax_paid", + "fieldPath": "tax_paid", + "type": "DECIMAL" + }, + { + "fieldName": "ordered_at", + "fieldPath": "ordered_at", + "type": "DATETIME" + }, + { + "fieldName": "is_food_order", + "fieldPath": "is_food_order", + "type": "INTEGER" + }, + { + "fieldName": "is_drink_order", + "fieldPath": "is_drink_order", + "type": "INTEGER" + }, + { + "fieldName": "order_cost", + "fieldPath": "order_cost", + "type": "DECIMAL" + }, + { + "fieldName": "customer_name", + "fieldPath": "customer_name", + "type": "STRING" + }, + { + "fieldName": "customer_type", + "fieldPath": "customer_type", + "type": "STRING" + } + ] + }, + "sourceInfo": { + "createdAtSource": "2024-09-18T15:59:12.164000+08:00", + "lastUpdated": "2024-09-18T16:09:07.953000+08:00" + }, + "structure": { + "name": "orders" + } + }, + { + "dashboardInfo": { + "charts": [ + { + "chartType": "OTHER", + "title": "Revenue and Cost by Country" + } + ], + "title": "Sales" + }, + "entityUpstream": { + "sourceEntities": [ + "VIRTUAL_VIEW~C791C9EC7B680B3BAF65B79D1765219B" + ] + }, + "logicalId": { + "dashboardId": "arn:aws:quicksight:us-west-2:123456789012:dashboard/1c9b51b1-3204-4ee3-9491-e92ddded8518", + "platform": "QUICK_SIGHT" + }, + "sourceInfo": { + "createdAtSource": "2024-09-12T21:01:13.249000+08:00", + "lastUpdated": "2024-09-12T21:01:13.244000+08:00" + }, + "structure": { + "name": "Sales" + } + }, + { + "dashboardInfo": { + "charts": [ + { + "chartType": "OTHER", + "title": "Relation between minutes and docks" + } + ], + "title": "Bike rides" + }, + "entityUpstream": { + "sourceEntities": [ + "VIRTUAL_VIEW~042F9509D727CB7FE827A0FF0AD9DDEE" + ] + }, + "logicalId": { + "dashboardId": "arn:aws:quicksight:us-west-2:123456789012:dashboard/eb39c0d9-d071-43e6-b75a-cc303752b702", + "platform": "QUICK_SIGHT" + }, + "sourceInfo": { + "createdAtSource": "2024-09-18T16:24:00.929000+08:00", + "lastUpdated": "2024-09-18T16:24:00.923000+08:00" + }, + "structure": { + "name": "Bike rides" + } + }, + { + "dashboardInfo": { + "charts": [ + { + "chartType": "OTHER", + "title": "Sheet 1" + } + ], + "title": "Jaffle Shop Orders" + }, + "entityUpstream": { + "sourceEntities": [ + "VIRTUAL_VIEW~543D1D7F1F0469635E8A454303BA2913" + ] + }, + "logicalId": { + "dashboardId": "arn:aws:quicksight:us-west-2:123456789012:dashboard/fea7f6ce-ac23-44f4-bf9b-fb03b535403e", + "platform": "QUICK_SIGHT" + }, + "sourceInfo": { + "createdAtSource": "2024-09-18T16:09:35.235000+08:00", + "lastUpdated": "2024-09-18T16:09:35.228000+08:00" + }, + "structure": { + "name": "Jaffle Shop Orders" + } + } +] diff --git a/tests/quick_sight/test_data_source_utils.py b/tests/quick_sight/test_data_source_utils.py new file mode 100644 index 00000000..8f5a5942 --- /dev/null +++ b/tests/quick_sight/test_data_source_utils.py @@ -0,0 +1,129 @@ +import metaphor.quick_sight.models as models +from metaphor.quick_sight.data_source_utils import get_account, get_database + + +def test_utils(): + aurora_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + AuroraParameters=models.TypeAuroraParameters( + Database="db", + Host="host", + Port=123, + ) + ) + ) + + aurora_postgres_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + AuroraPostgreSqlParameters=models.TypeAuroraPostgreSqlParameters( + Database="db", + Host="host", + Port=123, + ) + ) + ) + + bigquery_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + BigQueryParameters=models.TypeBigQueryParameters(ProjectId="project") + ) + ) + + maria_db_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + MariaDbParameters=models.TypeMariaDbParameters( + Database="db", + Host="host", + Port=123, + ) + ) + ) + + mysql_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + MySqlParameters=models.TypeMySqlParameters( + Database="db", + Host="host", + Port=123, + ) + ) + ) + + oracle_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + OracleParameters=models.TypeOracleParameters( + Database="db", + Host="host", + Port=123, + ) + ) + ) + + postgres_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + PostgreSqlParameters=models.TypePostgreSqlParameters( + Database="db", + Host="host", + Port=123, + ) + ) + ) + + rds_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + RdsParameters=models.TypeRdsParameters( + Database="db", + InstanceId="123123801", + ) + ) + ) + + redshift_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + RedshiftParameters=models.TypeRedshiftParameters( + Database="db", Host="host", Port=123 + ) + ) + ) + + snowflake_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + SnowflakeParameters=models.TypeSnowflakeParameters( + Database="db", Host="account.snowflakecomputing.com" + ) + ) + ) + + sql_server_source = models.DataSource( + DataSourceParameters=models.TypeDataSourceParameters( + SqlServerParameters=models.TypeSqlServerParameters( + Database="db", Host="host", Port=123 + ) + ) + ) + + assert get_database(models.DataSource()) is None + assert get_database(aurora_source) == "db" + assert get_database(aurora_postgres_source) == "db" + assert get_database(bigquery_source) == "project" + assert get_database(maria_db_source) == "db" + assert get_database(mysql_source) == "db" + assert get_database(oracle_source) == "db" + assert get_database(postgres_source) == "db" + assert get_database(rds_source) == "db" + assert get_database(redshift_source) == "db" + assert get_database(snowflake_source) == "db" + assert get_database(sql_server_source) == "db" + + assert get_account(models.DataSource()) is None + assert get_account(aurora_source) == "host" + assert get_account(aurora_postgres_source) is None + assert get_account(bigquery_source) is None + assert get_account(maria_db_source) == "host" + assert get_account(mysql_source) == "host" + assert get_account(oracle_source) == "host" + assert get_account(postgres_source) is None + assert get_account(rds_source) is None + assert get_account(redshift_source) is None + assert get_account(snowflake_source) == "account" + assert get_account(sql_server_source) == "host" diff --git a/tests/quick_sight/test_extractor.py b/tests/quick_sight/test_extractor.py new file mode 100644 index 00000000..6a611207 --- /dev/null +++ b/tests/quick_sight/test_extractor.py @@ -0,0 +1,103 @@ +from unittest.mock import MagicMock, patch + +import pytest + +from metaphor.common.base_config import OutputConfig +from metaphor.common.event_util import EventUtil +from metaphor.quick_sight.config import AwsCredentials, QuickSightRunConfig +from metaphor.quick_sight.extractor import QuickSightExtractor +from tests.test_utils import load_json + + +def dummy_config(): + return QuickSightRunConfig( + aws=AwsCredentials( + access_key_id="key", secret_access_key="secret", region_name="region" + ), + aws_account_id=123, + output=OutputConfig(), + ) + + +@patch("metaphor.quick_sight.client.create_quick_sight_client") +@pytest.mark.asyncio +async def test_extractor(mock_create_client: MagicMock, test_root_dir: str): + datasets_response = load_json(f"{test_root_dir}/quick_sight/data/datasets.json") + dashboards_response = load_json(f"{test_root_dir}/quick_sight/data/dashboards.json") + data_sources_response = load_json( + f"{test_root_dir}/quick_sight/data/data_sources.json" + ) + + list_data_sets_response = [ + { + "DataSetSummaries": [ + {"DataSetId": item["DataSet"]["DataSetId"]} + for item in datasets_response + ] + } + ] + + list_dashboards_response = [ + { + "DashboardSummaryList": [ + {"DashboardId": item["Dashboard"]["DashboardId"]} + for item in dashboards_response + ] + } + ] + + list_data_sources_response = [ + { + "DataSources": [ + {"DataSourceId": item["DataSource"]["DataSourceId"]} + for item in data_sources_response + ] + } + ] + + def mock_describe_data_set(DataSetId: str, AwsAccountId): + return next( + item + for item in datasets_response + if item["DataSet"]["DataSetId"] == DataSetId + ) + + def mock_describe_dashboard(DashboardId: str, AwsAccountId): + return next( + item + for item in dashboards_response + if item["Dashboard"]["DashboardId"] == DashboardId + ) + + def mock_describe_data_source(DataSourceId: str, AwsAccountId): + return next( + item + for item in data_sources_response + if item["DataSource"]["DataSourceId"] == DataSourceId + ) + + def mock_get_paginator(method: str): + if method == "list_data_sets": + mock_paginator = MagicMock() + mock_paginator.paginate.return_value = list_data_sets_response + return mock_paginator + elif method == "list_dashboards": + mock_paginator = MagicMock() + mock_paginator.paginate.return_value = list_dashboards_response + return mock_paginator + elif method == "list_data_sources": + mock_paginator = MagicMock() + mock_paginator.paginate.return_value = list_data_sources_response + return mock_paginator + + mock_client = MagicMock() + mock_client.get_paginator = mock_get_paginator + mock_client.describe_data_set = mock_describe_data_set + mock_client.describe_dashboard = mock_describe_dashboard + mock_client.describe_data_source = mock_describe_data_source + mock_create_client.return_value = mock_client + + extractor = QuickSightExtractor(dummy_config()) + events = [EventUtil.trim_event(e) for e in await extractor.extract()] + + assert events == load_json(f"{test_root_dir}/quick_sight/expected.json")