Skip to content

Commit

Permalink
[BUGFIX] Column Descriptive Metrics: Skip finding semantic type info …
Browse files Browse the repository at this point in the history
…for UNKNOWN columns (#8815)
  • Loading branch information
anthonyburdi authored Oct 10, 2023
1 parent 6607a02 commit ce09dbc
Showing 1 changed file with 21 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,15 @@ def get_validator(self, batch_request: BatchRequest) -> Validator:
def get_metrics(self, batch_request: BatchRequest) -> Sequence[Metric]:
table_metrics = self._get_table_metrics(batch_request)

# We need to skip columns that do not report a type, because the metric computation
# to determine semantic type will fail.
table_column_types = list(
filter(lambda m: m.metric_name == "table.column_types", table_metrics)
)[0]
exclude_column_names = self._get_columns_to_exclude(table_column_types)

numeric_column_names = self._get_numeric_column_names(
batch_request=batch_request
batch_request=batch_request, exclude_column_names=exclude_column_names
)
numeric_column_metrics = self._get_numeric_column_metrics(
batch_request=batch_request, column_list=numeric_column_names
Expand Down Expand Up @@ -151,6 +158,13 @@ def _get_table_column_types(
exception=exception,
)

def _get_columns_to_exclude(self, table_column_types: Metric) -> List[str]:
columns_to_skip: List[str] = []
for column_type in table_column_types.value:
if column_type["type"] == "UNKNOWN":
columns_to_skip.append(column_type["name"])
return columns_to_skip

def _get_numeric_column_metrics(
self, batch_request: BatchRequest, column_list: List[str]
) -> Sequence[Metric]:
Expand Down Expand Up @@ -240,11 +254,16 @@ def _get_all_column_names(self, metrics: Sequence[Metric]) -> List[str]:
column_list = metric.value
return column_list

def _get_numeric_column_names(self, batch_request: BatchRequest) -> list[str]:
def _get_numeric_column_names(
self,
batch_request: BatchRequest,
exclude_column_names: List[str],
) -> list[str]:
"""Get the names of all numeric columns in the batch."""
validator = self.get_validator(batch_request=batch_request)
domain_builder = ColumnDomainBuilder(
include_semantic_types=[SemanticDomainTypes.NUMERIC],
exclude_column_names=exclude_column_names,
)
assert isinstance(
validator.active_batch, Batch
Expand Down

0 comments on commit ce09dbc

Please sign in to comment.