Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
svdimchenko committed Jan 14, 2025
1 parent 587f999 commit 5605687
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 39 deletions.
2 changes: 1 addition & 1 deletion metadata-ingestion/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ disallow_untyped_defs = yes

[tool:pytest]
asyncio_mode = auto
addopts = --cov=src --cov-report= --cov-config setup.cfg --strict-markers -p no:faker
; addopts = --cov=src --cov-report= --cov-config setup.cfg --strict-markers -p no:faker
markers =
slow: marks tests that are slow to run, including all docker-based tests (deselect with '-m not slow')
integration: marks all integration tests, across all batches (deselect with '-m "not integration"')
Expand Down
15 changes: 5 additions & 10 deletions metadata-ingestion/src/datahub/ingestion/source/aws/glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from dataclasses import dataclass, field as dataclass_field
from functools import lru_cache
from typing import (
TYPE_CHECKING,
Any,
DefaultDict,
Dict,
Expand All @@ -22,6 +21,7 @@

import botocore.exceptions
import yaml
from mypy_boto3_glue.type_defs import DatabasePaginatorTypeDef, TablePaginatorTypeDef
from pydantic import validator
from pydantic.fields import Field

Expand Down Expand Up @@ -115,12 +115,6 @@
from datahub.utilities.delta import delta_type_to_hive_type
from datahub.utilities.hive_schema_to_avro import get_schema_fields_for_hive_column

if TYPE_CHECKING:
from mypy_boto3_glue.type_defs import (
DatabasePaginatorTypeDef,
TablePaginatorTypeDef,
)

logger = logging.getLogger(__name__)

DEFAULT_PLATFORM = "glue"
Expand Down Expand Up @@ -234,9 +228,10 @@ def platform_validator(cls, v: str) -> str:
f"'platform' can only take following values: {VALID_PLATFORMS}"
)

def __post_init__(self) -> None:
current_account_id = self.sts_client.get_caller_identity().get("Account")
def __init__(self, **data: Any):
super().__init__(**data)
if self.catalog_id:
current_account_id = self.sts_client.get_caller_identity().get("Account")
if self.catalog_id == current_account_id:
self.catalog_name = DEFAULT_CATALOG_NAME

Check warning on line 236 in metadata-ingestion/src/datahub/ingestion/source/aws/glue.py

View check run for this annotation

Codecov / codecov/patch

metadata-ingestion/src/datahub/ingestion/source/aws/glue.py#L235-L236

Added lines #L235 - L236 were not covered by tests
else:
Expand Down Expand Up @@ -1142,7 +1137,7 @@ def _gen_table_wu(self, table: TablePaginatorTypeDef) -> Iterable[MetadataWorkUn
platform_instance=self.source_config.platform_instance,
)

mce = self._extract_record(dataset_urn, table, full_table_name)
mce = self._extract_record(dataset_urn, dict(table), full_table_name)
yield MetadataWorkUnit(full_table_name, mce=mce)

# We also want to assign "table" subType to the dataset representing glue table - unfortunately it is not
Expand Down
28 changes: 14 additions & 14 deletions metadata-ingestion/tests/unit/glue/glue_mces_golden.json
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,flights-database.avro,PROD)",
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.flights-database.avro,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.Status": {
Expand Down Expand Up @@ -205,7 +205,7 @@
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "flights-database.avro",
"schemaName": "awsdatacatalog.flights-database.avro",
"platform": "urn:li:dataPlatform:glue",
"version": 0,
"created": {
Expand Down Expand Up @@ -370,7 +370,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,flights-database.avro,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.flights-database.avro,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
Expand All @@ -383,7 +383,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,flights-database.avro,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.flights-database.avro,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
Expand All @@ -395,7 +395,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,test-database.test_jsons_markers,PROD)",
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.test-database.test_jsons_markers,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.Status": {
Expand Down Expand Up @@ -432,7 +432,7 @@
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "test-database.test_jsons_markers",
"schemaName": "awsdatacatalog.test-database.test_jsons_markers",
"platform": "urn:li:dataPlatform:glue",
"version": 0,
"created": {
Expand Down Expand Up @@ -555,7 +555,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,test-database.test_jsons_markers,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.test-database.test_jsons_markers,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
Expand All @@ -568,7 +568,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,test-database.test_jsons_markers,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.test-database.test_jsons_markers,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
Expand All @@ -580,7 +580,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,test-database.test_parquet,PROD)",
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.test-database.test_parquet,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.Status": {
Expand Down Expand Up @@ -617,7 +617,7 @@
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "test-database.test_parquet",
"schemaName": "awsdatacatalog.test-database.test_parquet",
"platform": "urn:li:dataPlatform:glue",
"version": 0,
"created": {
Expand Down Expand Up @@ -741,7 +741,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,test-database.test_parquet,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.test-database.test_parquet,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
Expand All @@ -754,7 +754,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,test-database.test_parquet,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.test-database.test_parquet,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
Expand Down Expand Up @@ -896,7 +896,7 @@
{
"com.linkedin.pegasus2avro.datajob.DataJobInputOutput": {
"inputDatasets": [
"urn:li:dataset:(urn:li:dataPlatform:glue,flights-database.avro,PROD)"
"urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.flights-database.avro,PROD)"
],
"outputDatasets": [],
"inputDatajobs": []
Expand Down Expand Up @@ -1085,7 +1085,7 @@
{
"com.linkedin.pegasus2avro.datajob.DataJobInputOutput": {
"inputDatasets": [
"urn:li:dataset:(urn:li:dataPlatform:glue,test-database.test_parquet,PROD)"
"urn:li:dataset:(urn:li:dataPlatform:glue,awsdatacatalog.test-database.test_parquet,PROD)"
],
"outputDatasets": [],
"inputDatajobs": []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.flights-database.avro,PROD)",
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.flights-database.avro,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.Status": {
Expand Down Expand Up @@ -211,7 +211,7 @@
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "flights-database.avro",
"schemaName": "awsdatacatalog.flights-database.avro",
"platform": "urn:li:dataPlatform:glue",
"version": 0,
"created": {
Expand Down Expand Up @@ -377,7 +377,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.flights-database.avro,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.flights-database.avro,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
Expand All @@ -390,7 +390,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.flights-database.avro,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.flights-database.avro,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
Expand All @@ -402,7 +402,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.test-database.test_jsons_markers,PROD)",
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.test-database.test_jsons_markers,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.Status": {
Expand Down Expand Up @@ -439,7 +439,7 @@
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "test-database.test_jsons_markers",
"schemaName": "awsdatacatalog.test-database.test_jsons_markers",
"platform": "urn:li:dataPlatform:glue",
"version": 0,
"created": {
Expand Down Expand Up @@ -563,7 +563,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.test-database.test_jsons_markers,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.test-database.test_jsons_markers,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
Expand All @@ -576,7 +576,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.test-database.test_jsons_markers,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.test-database.test_jsons_markers,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
Expand All @@ -588,7 +588,7 @@
{
"proposedSnapshot": {
"com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": {
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.test-database.test_parquet,PROD)",
"urn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.test-database.test_parquet,PROD)",
"aspects": [
{
"com.linkedin.pegasus2avro.common.Status": {
Expand Down Expand Up @@ -625,7 +625,7 @@
},
{
"com.linkedin.pegasus2avro.schema.SchemaMetadata": {
"schemaName": "test-database.test_parquet",
"schemaName": "awsdatacatalog.test-database.test_parquet",
"platform": "urn:li:dataPlatform:glue",
"version": 0,
"created": {
Expand Down Expand Up @@ -750,7 +750,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.test-database.test_parquet,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.test-database.test_parquet,PROD)",
"changeType": "UPSERT",
"aspectName": "subTypes",
"aspect": {
Expand All @@ -763,7 +763,7 @@
},
{
"entityType": "dataset",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.test-database.test_parquet,PROD)",
"entityUrn": "urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.test-database.test_parquet,PROD)",
"changeType": "UPSERT",
"aspectName": "container",
"aspect": {
Expand Down Expand Up @@ -905,7 +905,7 @@
{
"com.linkedin.pegasus2avro.datajob.DataJobInputOutput": {
"inputDatasets": [
"urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.flights-database.avro,PROD)"
"urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.flights-database.avro,PROD)"
],
"outputDatasets": [],
"inputDatajobs": []
Expand Down Expand Up @@ -1094,7 +1094,7 @@
{
"com.linkedin.pegasus2avro.datajob.DataJobInputOutput": {
"inputDatasets": [
"urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.test-database.test_parquet,PROD)"
"urn:li:dataset:(urn:li:dataPlatform:glue,some_instance_name.awsdatacatalog.test-database.test_parquet,PROD)"
],
"outputDatasets": [],
"inputDatajobs": []
Expand Down
3 changes: 3 additions & 0 deletions metadata-ingestion/tests/unit/glue/test_glue_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest
from botocore.stub import Stubber
from freezegun import freeze_time
from moto import mock_athena, mock_sts

import datahub.metadata.schema_classes as models
from datahub.ingestion.api.common import PipelineContext
Expand Down Expand Up @@ -256,6 +257,8 @@ def test_glue_ingest(
)


@mock_athena
@mock_sts
def test_platform_config():
source = GlueSource(
ctx=PipelineContext(run_id="glue-source-test"),
Expand Down

0 comments on commit 5605687

Please sign in to comment.