-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Inital implementation * Remove unuse code * Update poetry * Bump version * Update readme * Update readme * Update readme * Improve readability * Add test * Update metaphor/quick_sight/extractor.py Co-authored-by: Tsung-Ju Lii <[email protected]> * Update metaphor/quick_sight/extractor.py Co-authored-by: Tsung-Ju Lii <[email protected]> * Update metaphor/quick_sight/cll.py Co-authored-by: Tsung-Ju Lii <[email protected]> * Refine --------- Co-authored-by: Tsung-Ju Lii <[email protected]>
- Loading branch information
1 parent
a036ba5
commit 167ccb7
Showing
21 changed files
with
3,127 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,21 @@ | ||
PRIVATE_LINK_SUFFIX = ".privatelink" | ||
SNOWFLAKE_HOST_SUFFIX = ".snowflakecomputing.com" | ||
|
||
|
||
def normalize_snowflake_account(account: str) -> str: | ||
def normalize_snowflake_account(host: str) -> str: | ||
""" | ||
Normalize different variations of Snowflake account. | ||
See https://docs.snowflake.com/en/user-guide/admin-account-identifier | ||
""" | ||
|
||
# Account name is case insensitive | ||
account = account.lower() | ||
host = host.lower() | ||
|
||
if host.endswith(SNOWFLAKE_HOST_SUFFIX): | ||
host = host[: -len(SNOWFLAKE_HOST_SUFFIX)] | ||
|
||
# Strip PrivateLink suffix | ||
if account.endswith(PRIVATE_LINK_SUFFIX): | ||
return account[: -len(PRIVATE_LINK_SUFFIX)] | ||
if host.endswith(PRIVATE_LINK_SUFFIX): | ||
return host[: -len(PRIVATE_LINK_SUFFIX)] | ||
|
||
return account | ||
return host |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
# QuickSight Connector | ||
|
||
This connector extracts technical metadata from AWS QuickSight using the [boto3](https://boto3.amazonaws.com/v1/documentation/api/latest/index.html) library. | ||
|
||
## Setup | ||
|
||
We recommend creating a dedicated AWS IAM user for the crawler with limited permissions based on the following IAM policy: | ||
|
||
``` json | ||
{ | ||
"Version": "2012-10-17", | ||
"Statement": | ||
[ | ||
{ | ||
"Effect": "Allow", | ||
"Action": | ||
[ | ||
"quicksight:DescribeAnalysis", | ||
"quicksight:DescribeDashboard", | ||
"quicksight:DescribeDataSource", | ||
"quicksight:DescribeDataSet", | ||
"quicksight:DescribeDataSetRefreshProperties", | ||
"quicksight:DescribeFolder", | ||
"quicksight:DescribeUser", | ||
"quicksight:ListAnalyses", | ||
"quicksight:ListDashboards", | ||
"quicksight:ListDataSources", | ||
"quicksight:ListDataSets", | ||
"quicksight:ListFolders", | ||
"quicksight:ListUsers", | ||
], | ||
"Resource": | ||
[ | ||
"*" | ||
] | ||
} | ||
] | ||
} | ||
``` | ||
|
||
## Config File | ||
|
||
Create a YAML config file based on the following template. | ||
|
||
### Required Configurations | ||
|
||
You must specify an AWS user credential to access QuickSight API. You can also specify a role ARN and let the connector assume the role before accessing AWS APIs. | ||
|
||
```yaml | ||
aws: | ||
access_key_id: <aws_access_key_id> | ||
secret_access_key: <aws_secret_access_key> | ||
region_name: <aws_region_name> | ||
assume_role_arn: <aws_role_arn> # If using IAM role | ||
aws_account_id: <quick_aws_account_id> | ||
``` | ||
### Optional Configurations | ||
#### Output Destination | ||
See [Output Config](../common/docs/output.md) for more information. | ||
## Testing | ||
Follow the [Installation](../../README.md) instructions to install `metaphor-connectors` in your environment (or virtualenv). | ||
|
||
Run the following command to test the connector locally: | ||
|
||
```shell | ||
metaphor quick_sight <config_file> | ||
``` | ||
|
||
Manually verify the output after the run finishes. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from metaphor.common.cli import cli_main | ||
from metaphor.quick_sight.extractor import QuickSightExtractor | ||
|
||
|
||
def main(config_file: str): | ||
cli_main(QuickSightExtractor, config_file) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import enum | ||
from typing import Dict, List | ||
|
||
import boto3 | ||
from pydantic.dataclasses import dataclass | ||
|
||
from metaphor.common.aws import AwsCredentials | ||
from metaphor.common.logger import json_dump_to_debug_file | ||
from metaphor.quick_sight.models import Dashboard, DataSet, DataSource, ResourceType | ||
|
||
|
||
def create_quick_sight_client(aws: AwsCredentials) -> boto3.client: | ||
return aws.get_session().client("quicksight") | ||
|
||
|
||
class Endpoint(enum.Enum): | ||
list_dashboards = "list_dashboards" | ||
list_data_sets = "list_data_sets" | ||
list_data_sources = "list_data_sources" | ||
|
||
|
||
@dataclass | ||
class EndpointDictKeys: | ||
list_key: str | ||
item_key: str | ||
|
||
|
||
ENDPOINT_SETTING = { | ||
Endpoint.list_data_sets: EndpointDictKeys("DataSetSummaries", "DataSetId"), | ||
Endpoint.list_dashboards: EndpointDictKeys("DashboardSummaryList", "DashboardId"), | ||
Endpoint.list_data_sources: EndpointDictKeys("DataSources", "DataSourceId"), | ||
} | ||
|
||
|
||
class Client: | ||
def __init__( | ||
self, | ||
aws: AwsCredentials, | ||
aws_account_id: str, | ||
resources: Dict[str, ResourceType], | ||
): | ||
self._client = create_quick_sight_client(aws) | ||
self._aws_account_id = aws_account_id | ||
self._resources = resources | ||
|
||
def get_resources(self): | ||
self._get_dataset_detail() | ||
self._get_dashboard_detail() | ||
self._get_data_source_detail() | ||
|
||
def _get_resource_ids(self, endpoint: Endpoint) -> List[str]: | ||
paginator = self._client.get_paginator(endpoint.value) | ||
paginator_response = paginator.paginate(AwsAccountId=self._aws_account_id) | ||
|
||
ids = [] | ||
settings = ENDPOINT_SETTING[endpoint] | ||
for page in paginator_response: | ||
for item in page[settings.list_key]: | ||
ids.append(item[settings.item_key]) | ||
return ids | ||
|
||
def _get_dataset_detail(self) -> None: | ||
results = [] | ||
for dataset_id in self._get_resource_ids(Endpoint.list_data_sets): | ||
result = self._client.describe_data_set( | ||
AwsAccountId=self._aws_account_id, DataSetId=dataset_id | ||
) | ||
|
||
results.append(result) | ||
|
||
dataset = DataSet(**(result["DataSet"])) | ||
|
||
if dataset.Arn is None: | ||
continue | ||
|
||
self._resources[dataset.Arn] = dataset | ||
|
||
json_dump_to_debug_file(results, "datasets.json") | ||
|
||
def _get_dashboard_detail(self): | ||
results = [] | ||
for dashboard_id in self._get_resource_ids(Endpoint.list_dashboards): | ||
result = self._client.describe_dashboard( | ||
AwsAccountId=self._aws_account_id, DashboardId=dashboard_id | ||
) | ||
results.append(result) | ||
dashboard = Dashboard(**(result["Dashboard"])) | ||
|
||
if dashboard.Arn is None: | ||
continue | ||
|
||
self._resources[dashboard.Arn] = dashboard | ||
|
||
json_dump_to_debug_file(results, "dashboards.json") | ||
|
||
def _get_data_source_detail(self): | ||
results = [] | ||
for data_source_id in self._get_resource_ids(Endpoint.list_data_sources): | ||
result = self._client.describe_data_source( | ||
AwsAccountId=self._aws_account_id, DataSourceId=data_source_id | ||
) | ||
results.append(result) | ||
|
||
data_source = DataSource(**(result["DataSource"])) | ||
|
||
if data_source.Arn is None: | ||
continue | ||
|
||
self._resources[data_source.Arn] = data_source | ||
|
||
json_dump_to_debug_file(results, "data_sources.json") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from pydantic.dataclasses import dataclass | ||
|
||
from metaphor.common.aws import AwsCredentials | ||
from metaphor.common.base_config import BaseConfig | ||
from metaphor.common.dataclass import ConnectorConfig | ||
|
||
|
||
@dataclass(config=ConnectorConfig) | ||
class QuickSightRunConfig(BaseConfig): | ||
aws: AwsCredentials | ||
|
||
aws_account_id: str |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
from typing import Optional | ||
|
||
from metaphor.common.snowflake import normalize_snowflake_account | ||
from metaphor.models.metadata_change_event import DataPlatform | ||
from metaphor.quick_sight.models import ( | ||
DataSource, | ||
DataSourceType, | ||
TypeDataSourceParameters, | ||
) | ||
|
||
DATA_SOURCE_PLATFORM_MAP = { | ||
DataSourceType.AURORA: DataPlatform.MYSQL, | ||
DataSourceType.AURORA_POSTGRESQL: DataPlatform.POSTGRESQL, | ||
DataSourceType.BIGQUERY: DataPlatform.BIGQUERY, | ||
DataSourceType.DATABRICKS: DataPlatform.UNITY_CATALOG, | ||
DataSourceType.MARIADB: DataPlatform.MYSQL, | ||
DataSourceType.MYSQL: DataPlatform.MYSQL, | ||
DataSourceType.POSTGRESQL: DataPlatform.POSTGRESQL, | ||
DataSourceType.REDSHIFT: DataPlatform.REDSHIFT, | ||
DataSourceType.ORACLE: DataPlatform.ORACLE, | ||
DataSourceType.SNOWFLAKE: DataPlatform.SNOWFLAKE, | ||
DataSourceType.SQLSERVER: DataPlatform.MSSQL, | ||
} | ||
|
||
|
||
def _get_database_from_parameters(parameters: TypeDataSourceParameters): | ||
if parameters.AuroraParameters: | ||
return parameters.AuroraParameters.Database | ||
|
||
if parameters.AuroraPostgreSqlParameters: | ||
return parameters.AuroraPostgreSqlParameters.Database | ||
|
||
if parameters.BigQueryParameters: | ||
return parameters.BigQueryParameters.ProjectId | ||
|
||
if parameters.MariaDbParameters: | ||
return parameters.MariaDbParameters.Database | ||
|
||
if parameters.MySqlParameters: | ||
return parameters.MySqlParameters.Database | ||
|
||
if parameters.OracleParameters: | ||
return parameters.OracleParameters.Database | ||
|
||
if parameters.PostgreSqlParameters: | ||
return parameters.PostgreSqlParameters.Database | ||
|
||
if parameters.RdsParameters: | ||
return parameters.RdsParameters.Database | ||
|
||
if parameters.RedshiftParameters: | ||
return parameters.RedshiftParameters.Database | ||
|
||
if parameters.SnowflakeParameters: | ||
return parameters.SnowflakeParameters.Database | ||
|
||
if parameters.SqlServerParameters: | ||
return parameters.SqlServerParameters.Database | ||
|
||
return None | ||
|
||
|
||
def get_database(data_source: DataSource) -> Optional[str]: | ||
""" | ||
Extract database from DataSource parameters | ||
""" | ||
if data_source.DataSourceParameters is None: | ||
return None | ||
|
||
parameters = data_source.DataSourceParameters | ||
|
||
return _get_database_from_parameters(parameters) | ||
|
||
|
||
def get_account(data_source: DataSource) -> Optional[str]: | ||
""" | ||
Extract account from DataSource parameters | ||
""" | ||
if data_source.DataSourceParameters is None: | ||
return None | ||
|
||
parameters = data_source.DataSourceParameters | ||
|
||
if parameters.AuroraParameters: | ||
return parameters.AuroraParameters.Host | ||
|
||
if parameters.MariaDbParameters: | ||
return parameters.MariaDbParameters.Host | ||
|
||
if parameters.MySqlParameters: | ||
return parameters.MySqlParameters.Host | ||
|
||
if parameters.OracleParameters: | ||
return parameters.OracleParameters.Host | ||
|
||
if parameters.SnowflakeParameters: | ||
return ( | ||
normalize_snowflake_account(parameters.SnowflakeParameters.Host) | ||
if parameters.SnowflakeParameters.Host | ||
else None | ||
) | ||
|
||
if parameters.SqlServerParameters: | ||
return parameters.SqlServerParameters.Host | ||
return None |
Oops, something went wrong.