Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Redshift profiler missing tables [sc-29514] #1017

Merged
merged 2 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 31 additions & 11 deletions metaphor/postgresql/profile/extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import asyncio
import traceback
from typing import Collection, Iterable, List
from typing import Collection, List

try:
import asyncpg
Expand Down Expand Up @@ -61,26 +61,40 @@

await asyncio.gather(*coroutines)

return self._datasets.values()
return [
dataset
for dataset in self._datasets.values()
if self._trim_fields_and_check_empty_dataset(dataset)
]

async def _profile_database(self, database: str) -> None:
pool = await self._create_connection_pool()

async with pool.acquire() as conn:
await self._fetch_tables(conn, database)
datasets = await self._fetch_columns(conn, database)
logger.info(f"Include {len(datasets)} tables from {database}")
logger.info(f"Include {len(datasets)} datasets from {database}")

Check warning on line 76 in metaphor/postgresql/profile/extractor.py

View check run for this annotation

Codecov / codecov/patch

metaphor/postgresql/profile/extractor.py#L76

Added line #L76 was not covered by tests

tasks = [
self._profile_dataset(pool, dataset)
for dataset in datasets
if dataset.schema.sql_schema.materialization != MaterializationType.VIEW
or not self._include_views
if self._filter_dataset_type(dataset)
]
await asyncio.gather(*tasks)
await pool.close()

self._trim_fields(datasets)
def _filter_dataset_type(self, dataset: Dataset) -> bool:
"""
Filter out dataset types based on the config, not profile "External", "Stream" and "Snapshot"
"""
dataset_type = dataset.schema.sql_schema.materialization
if self._include_views:
return dataset_type in {

Check warning on line 92 in metaphor/postgresql/profile/extractor.py

View check run for this annotation

Codecov / codecov/patch

metaphor/postgresql/profile/extractor.py#L90-L92

Added lines #L90 - L92 were not covered by tests
MaterializationType.TABLE,
MaterializationType.VIEW,
MaterializationType.MATERIALIZED_VIEW,
}
return dataset_type == MaterializationType.TABLE

Check warning on line 97 in metaphor/postgresql/profile/extractor.py

View check run for this annotation

Codecov / codecov/patch

metaphor/postgresql/profile/extractor.py#L97

Added line #L97 was not covered by tests

async def _profile_dataset(self, pool: asyncpg.Pool, dataset: Dataset) -> None:
async with pool.acquire() as conn:
Expand Down Expand Up @@ -224,8 +238,14 @@
)

@staticmethod
def _trim_fields(datasets: Iterable[Dataset]) -> None:
"""Drop temporary fields"""
for dataset in datasets:
dataset.schema = None
dataset.statistics = None
def _trim_fields_and_check_empty_dataset(dataset: Dataset) -> bool:
"""Drop temporary fields and check if the dataset field statistic is empty"""
if (

Check warning on line 243 in metaphor/postgresql/profile/extractor.py

View check run for this annotation

Codecov / codecov/patch

metaphor/postgresql/profile/extractor.py#L243

Added line #L243 was not covered by tests
not dataset.field_statistics
or not dataset.field_statistics.field_statistics
):
return False

Check warning on line 247 in metaphor/postgresql/profile/extractor.py

View check run for this annotation

Codecov / codecov/patch

metaphor/postgresql/profile/extractor.py#L247

Added line #L247 was not covered by tests

dataset.schema = None
dataset.statistics = None
return True

Check warning on line 251 in metaphor/postgresql/profile/extractor.py

View check run for this annotation

Codecov / codecov/patch

metaphor/postgresql/profile/extractor.py#L249-L251

Added lines #L249 - L251 were not covered by tests
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.14.134"
version = "0.14.135"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down
Loading