Skip to content

Commit

Permalink
Support column descriptions for custom.governance extractor (#656)
Browse files Browse the repository at this point in the history
* Parse column descriptions in custom.governance extractor

* update models version

* change back poetry lock

* refactor

* stick with old poetry.lock

* refactor

* change back pymssql version
  • Loading branch information
usefulalgorithm authored Nov 3, 2023
1 parent 430adec commit 083d185
Show file tree
Hide file tree
Showing 8 changed files with 200 additions and 62 deletions.
7 changes: 7 additions & 0 deletions metaphor/custom/governance/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ datasets:
- description: <description_text>
email: <author_email>
...
column_descriptions:
- column_name: <column_name>
descriptions:
- description: <description_text>
email: <author_email>
...
...
...
output:
file:
Expand Down
85 changes: 84 additions & 1 deletion metaphor/custom/governance/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
from dataclasses import field as dataclass_field
from typing import List
from typing import List, Optional

from pydantic.dataclasses import dataclass

from metaphor.common.base_extractor import BaseConfig
from metaphor.common.dataclass import ConnectorConfig
from metaphor.common.entity_id import to_person_entity_id
from metaphor.common.models import DeserializableDatasetLogicalID
from metaphor.models.metadata_change_event import (
AssetDescription,
ColumnDescriptionAssignment,
ColumnTagAssignment,
DescriptionAssignment,
)
from metaphor.models.metadata_change_event import (
Ownership as OwnershipAssignmentOwnership,
)
from metaphor.models.metadata_change_event import OwnershipAssignment, TagAssignment


@dataclass(config=ConnectorConfig)
Expand Down Expand Up @@ -34,6 +45,27 @@ class Description:
# The author's email address
email: str

def to_asset_description(self) -> AssetDescription:
return AssetDescription(
author=str(to_person_entity_id(self.email)),
description=self.description,
)


@dataclass(config=ConnectorConfig)
class ColumnDescriptions:
# The column's descriptions
descriptions: List[Description]

# Name of the column
column_name: str

def to_column_asset_description(self) -> ColumnDescriptionAssignment:
return ColumnDescriptionAssignment(
asset_descriptions=[d.to_asset_description() for d in self.descriptions],
column_name=self.column_name,
)


@dataclass(config=ConnectorConfig)
class DatasetGovernance:
Expand All @@ -47,6 +79,57 @@ class DatasetGovernance:

descriptions: List[Description] = dataclass_field(default_factory=lambda: [])

column_descriptions: List[ColumnDescriptions] = dataclass_field(
default_factory=lambda: []
)

def to_ownership_assignment(self) -> Optional[OwnershipAssignment]:
if not self.ownerships:
return None

ownerships = [
OwnershipAssignmentOwnership(
contact_designation_name=o.type,
person=str(to_person_entity_id(o.email)),
)
for o in self.ownerships
]

return OwnershipAssignment(ownerships=ownerships)

def to_tag_assignment(self) -> Optional[TagAssignment]:
if not self.tags and not self.column_tags:
return None

tag_assignment = TagAssignment()
if self.tags:
tag_assignment.tag_names = self.tags
if self.column_tags:
tag_assignment.column_tag_assignments = [
ColumnTagAssignment(
column_name=column_tag.column, tag_names=column_tag.tags
)
for column_tag in self.column_tags
]

return tag_assignment

def to_description_assignment(self) -> Optional[DescriptionAssignment]:
if not self.descriptions and not self.column_descriptions:
return None

description_assignment = DescriptionAssignment()
if self.descriptions:
description_assignment.asset_descriptions = [
d.to_asset_description() for d in self.descriptions
]
if self.column_descriptions:
description_assignment.column_description_assignments = [
d.to_column_asset_description() for d in self.column_descriptions
]

return description_assignment


@dataclass(config=ConnectorConfig)
class CustomGovernanceConfig(BaseConfig):
Expand Down
55 changes: 4 additions & 51 deletions metaphor/custom/governance/extractor.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,9 @@
from typing import List

from metaphor.common.base_extractor import BaseExtractor
from metaphor.common.entity_id import to_person_entity_id
from metaphor.common.logger import get_logger
from metaphor.custom.governance.config import CustomGovernanceConfig
from metaphor.models.metadata_change_event import (
AssetDescription,
ColumnTagAssignment,
Dataset,
DescriptionAssignment,
MetadataChangeEvent,
Ownership,
OwnershipAssignment,
TagAssignment,
)
from metaphor.models.metadata_change_event import Dataset, MetadataChangeEvent

logger = get_logger()

Expand All @@ -38,45 +28,8 @@ async def extract(self) -> List[MetadataChangeEvent]:
for governance in self._datasets:
dataset = Dataset(logical_id=governance.id.to_logical_id())
datasets.append(dataset)

if len(governance.ownerships) > 0:
ownerships = [
Ownership(
contact_designation_name=o.type,
person=str(to_person_entity_id(o.email)),
)
for o in governance.ownerships
]

dataset.ownership_assignment = OwnershipAssignment(
ownerships=ownerships
)

if len(governance.tags) > 0:
dataset.tag_assignment = TagAssignment(tag_names=governance.tags)

if len(governance.column_tags) > 0:
if dataset.tag_assignment is None:
dataset.tag_assignment = TagAssignment()

dataset.tag_assignment.column_tag_assignments = [
ColumnTagAssignment(
column_name=column_tag.column, tag_names=column_tag.tags
)
for column_tag in governance.column_tags
]

if len(governance.descriptions) > 0:
asset_descriptions = [
AssetDescription(
description=d.description,
author=str(to_person_entity_id(d.email)),
)
for d in governance.descriptions
]

dataset.description_assignment = DescriptionAssignment(
asset_descriptions=asset_descriptions
)
dataset.ownership_assignment = governance.to_ownership_assignment()
dataset.description_assignment = governance.to_description_assignment()
dataset.tag_assignment = governance.to_tag_assignment()

return datasets
Loading

0 comments on commit 083d185

Please sign in to comment.