Skip to content

Commit

Permalink
[MAINTENANCE] Column Descriptive Metrics: Default to UNKNOWN if type …
Browse files Browse the repository at this point in the history
…is not found (#8810)
  • Loading branch information
anthonyburdi authored Oct 6, 2023
1 parent 7eca4e9 commit 6607a02
Show file tree
Hide file tree
Showing 2 changed files with 163 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,12 @@ def _get_table_column_types(
aborted_metrics=aborted_metrics,
)
raw_column_types: list[dict[str, Any]] = value
# If type is not found, default to UNKNOWN
column_types_converted_to_str: list[dict[str, str]] = [
{"name": raw_column_type["name"], "type": str(raw_column_type["type"])}
{
"name": raw_column_type["name"],
"type": str(raw_column_type.get("type", "UNKNOWN")),
}
for raw_column_type in raw_column_types
]
return TableMetric[List[str]](
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,164 @@ def test_get_metrics_with_exception():
]


def test_get_metrics_with_column_type_missing():
"""This test is meant to simulate failed metrics in the computed metrics."""
mock_context = Mock(spec=CloudDataContext)
mock_validator = Mock(spec=Validator)
mock_context.get_validator.return_value = mock_validator

exception_info = ExceptionInfo(
exception_traceback="test exception traceback",
exception_message="test exception message",
raised_exception=True,
)

aborted_metrics = {
("table.row_count", (), ()): {
"metric_configuration": {}, # Leaving out for brevity
"num_failures": 3,
"exception_info": {exception_info},
},
("column.min", "column=col1", ()): {
"metric_configuration": {}, # Leaving out for brevity
"num_failures": 3,
"exception_info": {exception_info},
},
}

computed_metrics = {
# ("table.row_count", (), ()): 2, # Error in table.row_count metric
("table.columns", (), ()): ["col1", "col2"],
("table.column_types", (), "include_nested=True"): [
{"name": "col1", "type": "float"},
{
"name": "col2",
}, # Missing type for col2
],
# ("column.min", "column=col1", ()): 2.5, # Error in column.min metric for col1
("column.min", "column=col2", ()): 2.7,
("column.max", "column=col1", ()): 5.5,
("column.max", "column=col2", ()): 5.7,
("column.mean", "column=col1", ()): 2.5,
("column.mean", "column=col2", ()): 2.7,
("column.median", "column=col1", ()): 2.5,
("column.median", "column=col2", ()): 2.7,
("column_values.null.count", "column=col1", ()): 1,
("column_values.null.count", "column=col2", ()): 1,
}
mock_validator.compute_metrics_with_aborted_metrics.return_value = (
computed_metrics,
aborted_metrics,
)
mock_batch = Mock(spec=Batch)
mock_batch.id = "batch_id"
mock_validator.active_batch = mock_batch

metric_retriever = ColumnDescriptiveMetricsMetricRetriever(context=mock_context)

mock_batch_request = Mock(spec=BatchRequest)

with mock.patch(
f"{ColumnDomainBuilder.__module__}.{ColumnDomainBuilder.__name__}.get_effective_column_names",
return_value=["col1", "col2"],
):
metrics = metric_retriever.get_metrics(batch_request=mock_batch_request)

assert metrics == [
TableMetric[int](
batch_id="batch_id",
metric_name="table.row_count",
value=None,
exception=MetricException(type="Unknown", message="test exception message"),
),
TableMetric[List[str]](
batch_id="batch_id",
metric_name="table.columns",
value=["col1", "col2"],
exception=None,
),
TableMetric[List[str]](
batch_id="batch_id",
metric_name="table.column_types",
value=[
{"name": "col1", "type": "float"},
{"name": "col2", "type": "UNKNOWN"},
],
exception=None,
),
ColumnMetric[float](
batch_id="batch_id",
metric_name="column.min",
column="col1",
value=None,
exception=MetricException(type="Unknown", message="test exception message"),
),
ColumnMetric[float](
batch_id="batch_id",
metric_name="column.min",
column="col2",
value=2.7,
exception=None,
),
ColumnMetric[float](
batch_id="batch_id",
metric_name="column.max",
column="col1",
value=5.5,
exception=None,
),
ColumnMetric[float](
batch_id="batch_id",
metric_name="column.max",
column="col2",
value=5.7,
exception=None,
),
ColumnMetric[float](
batch_id="batch_id",
metric_name="column.mean",
value=2.5,
exception=None,
column="col1",
),
ColumnMetric[float](
batch_id="batch_id",
metric_name="column.mean",
value=2.7,
exception=None,
column="col2",
),
ColumnMetric[float](
batch_id="batch_id",
metric_name="column.median",
value=2.5,
exception=None,
column="col1",
),
ColumnMetric[float](
batch_id="batch_id",
metric_name="column.median",
value=2.7,
exception=None,
column="col2",
),
ColumnMetric[int](
batch_id="batch_id",
metric_name="column_values.null.count",
value=1,
exception=None,
column="col1",
),
ColumnMetric[int](
batch_id="batch_id",
metric_name="column_values.null.count",
value=1,
exception=None,
column="col2",
),
]


def test_get_metrics_only_gets_a_validator_once():
mock_context = Mock(spec=CloudDataContext)
mock_validator = Mock(spec=Validator)
Expand Down

0 comments on commit 6607a02

Please sign in to comment.