Skip to content

Commit

Permalink
[sc-29547] Deprecate BQ lineage crawler and a lot of refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
usefulalgorithm committed Nov 7, 2024
1 parent a980be2 commit 121105b
Show file tree
Hide file tree
Showing 15 changed files with 505 additions and 465 deletions.
23 changes: 23 additions & 0 deletions metaphor/bigquery/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,24 @@ class BigQueryQueryLogConfig:
max_requests_per_minute: int = DEFAULT_MAX_REQUESTS_PER_MINUTE


@dataclass(config=ConnectorConfig)
class BigQueryLineageConfig:
# Whether to enable parsing view query to find upstream of the view, default True
enable_view_lineage: bool = True

# Whether to enable parsing audit log to find table lineage information, default True
enable_lineage_from_log: bool = True

# Number of days back in the query log to process
lookback_days: int = 7

# Whether to include self loop in lineage
include_self_lineage: bool = True

# The number of access logs fetched in a batch, default to 1000
batch_size: int = 1000


@dataclass(config=ConnectorConfig)
class BigQueryRunConfig(BaseConfig):
# List of project IDs to extract metadata from
Expand Down Expand Up @@ -97,6 +115,11 @@ class BigQueryRunConfig(BaseConfig):
default_factory=lambda: BigQueryQueryLogConfig()
)

# configs for lineage information
lineage: BigQueryLineageConfig = field(
default_factory=lambda: BigQueryLineageConfig()
)

@model_validator(mode="after")
def have_key_path_or_credentials(self) -> "BigQueryRunConfig":
must_set_exactly_one(self.__dict__, ["key_path", "credentials"])
Expand Down
Loading

0 comments on commit 121105b

Please sign in to comment.