Skip to content

Commit

Permalink
fix: eval dataframe should allow a column to be all null if type does…
Browse files Browse the repository at this point in the history
…n't match expected (#5495)
  • Loading branch information
RogerHYang authored Nov 22, 2024
1 parent de458dc commit 69b6594
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 3 deletions.
7 changes: 4 additions & 3 deletions src/phoenix/trace/span_evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,15 @@ class NeedsResultColumns(ABC):
)

@classmethod
def is_valid_result_columns(cls, dtypes: "pd.Series[Any]") -> bool:
def is_valid_result_columns(cls, df: pd.DataFrame) -> bool:
dtypes = df.dtypes
names = cls.result_column_names.keys()
intersection = dtypes.index.intersection(names) # type: ignore
if not len(intersection):
return False
for name in intersection:
check_type = cls.result_column_names[name]
if not check_type(dtypes[name]):
if not check_type(dtypes[name]) and not df.loc[:, name].isna().all():
return False
return True

Expand Down Expand Up @@ -138,7 +139,7 @@ def _clean_dataframe(self, dataframe: pd.DataFrame) -> pd.DataFrame:
)

# Validate that the dataframe contains result columns of appropriate types.
if not self.is_valid_result_columns(dataframe.dtypes):
if not self.is_valid_result_columns(dataframe):
raise ValueError(
f"The dataframe must contain one of these columns with appropriate "
f"value types: {self.result_column_names.keys()} "
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/trace/test_span_evaluations.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pandas as pd
import pyarrow
import pytest
from pandas.core.dtypes.common import is_numeric_dtype
from pandas.testing import assert_frame_equal
from pyarrow import parquet

Expand Down Expand Up @@ -42,6 +43,20 @@ def test_span_evaluations_construction() -> None:
assert "score" in eval_ds.dataframe.columns


def test_span_evaluations_construction_allows_all_null_column() -> None:
num_records = 5
span_ids = [f"span_{index}" for index in range(num_records)]
df = pd.DataFrame(
{
"context.span_id": span_ids,
"label": [str(index) for index in range(num_records)],
"score": [None] * num_records,
}
).set_index("context.span_id")
assert not is_numeric_dtype(df.loc[:, "score"])
SpanEvaluations(eval_name="my_eval", dataframe=df)


def power_set(s: list[tuple[str, Any]]) -> Iterator[dict[str, Any]]:
for result in chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)):
yield dict(result)
Expand Down

0 comments on commit 69b6594

Please sign in to comment.