Skip to content

Commit

Permalink
address comments
Browse files Browse the repository at this point in the history
Signed-off-by: Naren Krishna <[email protected]>
  • Loading branch information
sfc-gh-nkrishna committed Aug 29, 2024
1 parent bdca479 commit a41dfed
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 39 deletions.
19 changes: 19 additions & 0 deletions src/snowflake/snowpark/modin/plugin/_internal/groupby_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,25 @@
]

NO_GROUPKEY_ERROR = ValueError("No group keys passed!")
GROUPBY_AGG_SAME_INPUT_AND_OUTPUT_DATA_TYPES = [
"min",
"max",
"sum",
"mean",
"median",
"std",
"first",
"last",
]
GROUPBY_AGG_DIFFERENT_INPUT_AND_OUTPUT_DATA_TYPES = [
"any",
"all",
"count",
"idxmax",
"idxmin",
"size",
"nunique",
]


def is_groupby_value_label_like(val: Any) -> bool:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@
)
from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame
from snowflake.snowpark.modin.plugin._internal.groupby_utils import (
GROUPBY_AGG_DIFFERENT_INPUT_AND_OUTPUT_DATA_TYPES,
GROUPBY_AGG_SAME_INPUT_AND_OUTPUT_DATA_TYPES,
check_is_groupby_supported_by_snowflake,
extract_groupby_column_pandas_labels,
get_frame_with_groupby_columns_as_index,
Expand Down Expand Up @@ -3426,30 +3428,13 @@ def convert_func_to_agg_func_info(
new_data_column_quoted_identifiers.append(
col_agg_op.agg_snowflake_quoted_identifier
)
if agg_func in (
"min",
"max",
"sum",
"mean",
"median",
"std",
"first",
"last",
):
if agg_func in GROUPBY_AGG_SAME_INPUT_AND_OUTPUT_DATA_TYPES:
new_data_column_snowpark_pandas_types.append(
col_agg_op.data_type
if isinstance(col_agg_op.data_type, SnowparkPandasType)
else None
)
elif agg_func in (
"any",
"all",
"count",
"idxmax",
"idxmin",
"size",
"nunique",
):
elif agg_func in GROUPBY_AGG_DIFFERENT_INPUT_AND_OUTPUT_DATA_TYPES:
# In the case where the aggregation overrides the type of the output data column
# (e.g. any always returns boolean data columns), set the output Snowpark pandas type to None
new_data_column_snowpark_pandas_types = None # type: ignore
Expand Down
1 change: 1 addition & 0 deletions tests/integ/modin/groupby/test_groupby_negative.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,7 @@ def test_groupby_agg_invalid_min_count(
)


@sql_count_checker(query_count=0)
def test_groupby_negative_var():
native_df = native_pd.DataFrame(
{
Expand Down
20 changes: 0 additions & 20 deletions tests/integ/modin/types/test_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,23 +88,3 @@ def test_timedelta_precision_insufficient_with_nulls_SNOW_1628925():
eval_snowpark_pandas_result(
pd, native_pd, lambda lib: lib.Series([None, timedelta])
)


@sql_count_checker(query_count=0)
def test_timedelta_not_supported():
df = pd.DataFrame(
{
"a": ["one", "two", "three"],
"b": ["abc", "pqr", "xyz"],
"dt": [
pd.Timedelta("1 days"),
pd.Timedelta("2 days"),
pd.Timedelta("3 days"),
],
}
)
with pytest.raises(
NotImplementedError,
match="validate_groupby is not yet implemented for Timedelta Type",
):
df.groupby("a").count()

0 comments on commit a41dfed

Please sign in to comment.