[MAINTENANCE] Column Descriptive Metrics: Default to UNKNOWN if type …

…is not found (#8810)
great-expectations · Oct 6, 2023 · 6607a02 · 6607a02
1 parent 7eca4e9
commit 6607a02
Show file tree

Hide file tree

Showing 2 changed files with 163 additions and 1 deletion.
diff --git a/...xpectations/experimental/metric_repository/column_descriptive_metrics_metric_retriever.py b/...xpectations/experimental/metric_repository/column_descriptive_metrics_metric_retriever.py
@@ -136,8 +136,12 @@ def _get_table_column_types(
             aborted_metrics=aborted_metrics,
         )
         raw_column_types: list[dict[str, Any]] = value
+        # If type is not found, default to UNKNOWN
         column_types_converted_to_str: list[dict[str, str]] = [
-            {"name": raw_column_type["name"], "type": str(raw_column_type["type"])}
+            {
+                "name": raw_column_type["name"],
+                "type": str(raw_column_type.get("type", "UNKNOWN")),
+            }
             for raw_column_type in raw_column_types
         ]
         return TableMetric[List[str]](

diff --git a/tests/experimental/metric_repository/test_column_descriptive_metrics_metric_retriever.py b/tests/experimental/metric_repository/test_column_descriptive_metrics_metric_retriever.py
@@ -457,6 +457,164 @@ def test_get_metrics_with_exception():
     ]
 
 
+def test_get_metrics_with_column_type_missing():
+    """This test is meant to simulate failed metrics in the computed metrics."""
+    mock_context = Mock(spec=CloudDataContext)
+    mock_validator = Mock(spec=Validator)
+    mock_context.get_validator.return_value = mock_validator
+
+    exception_info = ExceptionInfo(
+        exception_traceback="test exception traceback",
+        exception_message="test exception message",
+        raised_exception=True,
+    )
+
+    aborted_metrics = {
+        ("table.row_count", (), ()): {
+            "metric_configuration": {},  # Leaving out for brevity
+            "num_failures": 3,
+            "exception_info": {exception_info},
+        },
+        ("column.min", "column=col1", ()): {
+            "metric_configuration": {},  # Leaving out for brevity
+            "num_failures": 3,
+            "exception_info": {exception_info},
+        },
+    }
+
+    computed_metrics = {
+        # ("table.row_count", (), ()): 2, # Error in table.row_count metric
+        ("table.columns", (), ()): ["col1", "col2"],
+        ("table.column_types", (), "include_nested=True"): [
+            {"name": "col1", "type": "float"},
+            {
+                "name": "col2",
+            },  # Missing type for col2
+        ],
+        # ("column.min", "column=col1", ()): 2.5, # Error in column.min metric for col1
+        ("column.min", "column=col2", ()): 2.7,
+        ("column.max", "column=col1", ()): 5.5,
+        ("column.max", "column=col2", ()): 5.7,
+        ("column.mean", "column=col1", ()): 2.5,
+        ("column.mean", "column=col2", ()): 2.7,
+        ("column.median", "column=col1", ()): 2.5,
+        ("column.median", "column=col2", ()): 2.7,
+        ("column_values.null.count", "column=col1", ()): 1,
+        ("column_values.null.count", "column=col2", ()): 1,
+    }
+    mock_validator.compute_metrics_with_aborted_metrics.return_value = (
+        computed_metrics,
+        aborted_metrics,
+    )
+    mock_batch = Mock(spec=Batch)
+    mock_batch.id = "batch_id"
+    mock_validator.active_batch = mock_batch
+
+    metric_retriever = ColumnDescriptiveMetricsMetricRetriever(context=mock_context)
+
+    mock_batch_request = Mock(spec=BatchRequest)
+
+    with mock.patch(
+        f"{ColumnDomainBuilder.__module__}.{ColumnDomainBuilder.__name__}.get_effective_column_names",
+        return_value=["col1", "col2"],
+    ):
+        metrics = metric_retriever.get_metrics(batch_request=mock_batch_request)
+
+    assert metrics == [
+        TableMetric[int](
+            batch_id="batch_id",
+            metric_name="table.row_count",
+            value=None,
+            exception=MetricException(type="Unknown", message="test exception message"),
+        ),
+        TableMetric[List[str]](
+            batch_id="batch_id",
+            metric_name="table.columns",
+            value=["col1", "col2"],
+            exception=None,
+        ),
+        TableMetric[List[str]](
+            batch_id="batch_id",
+            metric_name="table.column_types",
+            value=[
+                {"name": "col1", "type": "float"},
+                {"name": "col2", "type": "UNKNOWN"},
+            ],
+            exception=None,
+        ),
+        ColumnMetric[float](
+            batch_id="batch_id",
+            metric_name="column.min",
+            column="col1",
+            value=None,
+            exception=MetricException(type="Unknown", message="test exception message"),
+        ),
+        ColumnMetric[float](
+            batch_id="batch_id",
+            metric_name="column.min",
+            column="col2",
+            value=2.7,
+            exception=None,
+        ),
+        ColumnMetric[float](
+            batch_id="batch_id",
+            metric_name="column.max",
+            column="col1",
+            value=5.5,
+            exception=None,
+        ),
+        ColumnMetric[float](
+            batch_id="batch_id",
+            metric_name="column.max",
+            column="col2",
+            value=5.7,
+            exception=None,
+        ),
+        ColumnMetric[float](
+            batch_id="batch_id",
+            metric_name="column.mean",
+            value=2.5,
+            exception=None,
+            column="col1",
+        ),
+        ColumnMetric[float](
+            batch_id="batch_id",
+            metric_name="column.mean",
+            value=2.7,
+            exception=None,
+            column="col2",
+        ),
+        ColumnMetric[float](
+            batch_id="batch_id",
+            metric_name="column.median",
+            value=2.5,
+            exception=None,
+            column="col1",
+        ),
+        ColumnMetric[float](
+            batch_id="batch_id",
+            metric_name="column.median",
+            value=2.7,
+            exception=None,
+            column="col2",
+        ),
+        ColumnMetric[int](
+            batch_id="batch_id",
+            metric_name="column_values.null.count",
+            value=1,
+            exception=None,
+            column="col1",
+        ),
+        ColumnMetric[int](
+            batch_id="batch_id",
+            metric_name="column_values.null.count",
+            value=1,
+            exception=None,
+            column="col2",
+        ),
+    ]
+
+
 def test_get_metrics_only_gets_a_validator_once():
     mock_context = Mock(spec=CloudDataContext)
     mock_validator = Mock(spec=Validator)