Skip to content

Commit

Permalink
SNOW-1625377 Raise NotImplementedError for timedelta (#2102)
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-azhan authored Aug 17, 2024
1 parent 92a7ed4 commit 8862b80
Show file tree
Hide file tree
Showing 28 changed files with 663 additions and 22 deletions.
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,10 @@
`is_month_start`, `is_month_end`, `is_quarter_start`, `is_quarter_end`, `is_year_start`, `is_year_end`
and `is_leap_year`.
- Added support for `Resampler.fillna` and `Resampler.bfill`.
- Added limited support for the `Timedelta` type, including creating `Timedelta` columns and `to_pandas`.
- Added limited support for the `Timedelta` type, including
- support for creating `Timedelta` columns and `to_pandas`.
- support `copy`, `cache_result`, `shift`, `sort_index`.
- `NotImplementedError` will be raised for the rest of methods that do not support `Timedelta`.
- Added support for `Index.argmax` and `Index.argmin`.
- Added support for index's arithmetic and comparison operators.
- Added support for `Series.dt.round`.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -652,6 +652,12 @@ def drop_non_numeric_data_columns(
col.snowflake_quoted_identifier for col in data_column_to_retain
]

new_data_column_types = [
type
for id, type in original_frame.snowflake_quoted_identifier_to_snowpark_pandas_type.items()
if id in new_data_column_snowflake_quoted_identifiers
]

return SnowflakeQueryCompiler(
InternalFrame.create(
ordered_dataframe=original_frame.ordered_dataframe,
Expand All @@ -660,6 +666,8 @@ def drop_non_numeric_data_columns(
data_column_pandas_index_names=original_frame.data_column_pandas_index_names,
index_column_pandas_labels=original_frame.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=original_frame.index_column_snowflake_quoted_identifiers,
data_column_types=new_data_column_types,
index_column_types=original_frame.cached_index_column_snowpark_pandas_types,
)
)

Expand Down
2 changes: 2 additions & 0 deletions src/snowflake/snowpark/modin/plugin/_internal/apply_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1257,6 +1257,8 @@ def groupby_apply_create_internal_frame_from_final_ordered_dataframe(
+ func_result_index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=group_quoted_identifiers
+ func_result_index_column_snowflake_quoted_identifiers,
data_column_types=None,
index_column_types=None,
)


Expand Down
12 changes: 12 additions & 0 deletions src/snowflake/snowpark/modin/plugin/_internal/concat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ def convert_to_single_level_index(frame: InternalFrame, axis: int) -> InternalFr
data_column_pandas_index_names=[None],
index_column_pandas_labels=frame.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
data_column_types=frame.cached_data_column_snowpark_pandas_types,
index_column_types=frame.cached_index_column_snowpark_pandas_types,
)
else:
WarningMessage.tuples_stored_as_array(
Expand All @@ -122,6 +124,8 @@ def convert_to_single_level_index(frame: InternalFrame, axis: int) -> InternalFr
data_column_pandas_labels=frame.data_column_pandas_labels,
data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers,
data_column_pandas_index_names=frame.data_column_pandas_index_names,
data_column_types=None,
index_column_types=None,
)


Expand Down Expand Up @@ -224,6 +228,8 @@ def union_all(
data_column_pandas_index_names=frame1.data_column_pandas_index_names,
index_column_pandas_labels=frame1.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=frame1.index_column_snowflake_quoted_identifiers,
data_column_types=None,
index_column_types=None,
)


Expand Down Expand Up @@ -262,6 +268,8 @@ def add_key_as_index_columns(frame: InternalFrame, key: Hashable) -> InternalFra
data_column_pandas_index_names=frame.data_column_pandas_index_names,
index_column_pandas_labels=index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers,
data_column_types=None,
index_column_types=None,
)


Expand Down Expand Up @@ -322,6 +330,8 @@ def _select_columns(
data_column_pandas_index_names=frame.data_column_pandas_index_names,
index_column_pandas_labels=frame.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
data_column_types=None,
index_column_types=None,
)


Expand Down Expand Up @@ -360,4 +370,6 @@ def add_global_ordering_columns(frame: InternalFrame, position: int) -> Internal
data_column_pandas_index_names=frame.data_column_pandas_index_names,
index_column_pandas_labels=frame.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
data_column_types=frame.cached_data_column_snowpark_pandas_types,
index_column_types=frame.cached_index_column_snowpark_pandas_types,
)
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ def get_groupby_cumagg_frame_axis0(
index_column_pandas_labels=result_frame.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=result_frame.index_column_snowflake_quoted_identifiers,
data_column_pandas_index_names=[None],
data_column_types=result_frame.cached_data_column_snowpark_pandas_types,
index_column_types=result_frame.cached_index_column_snowpark_pandas_types,
)
else:
return result_frame
2 changes: 2 additions & 0 deletions src/snowflake/snowpark/modin/plugin/_internal/cut_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ def compute_bin_indices(
data_column_snowflake_quoted_identifiers=[new_data_identifier],
index_column_pandas_labels=value_frame.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=value_index_identifiers,
data_column_types=None,
index_column_types=None,
)

return new_frame
66 changes: 55 additions & 11 deletions src/snowflake/snowpark/modin/plugin/_internal/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,19 @@ def _create_snowflake_quoted_identifier_to_snowpark_pandas_type(
dict mapping each column's Snowflake quoted identifier to the column's Snowpark pandas type.
"""
if data_column_types is not None:
assert len(data_column_types) == len(data_column_snowflake_quoted_identifiers)
assert len(data_column_types) == len(
data_column_snowflake_quoted_identifiers
), (
f"The length of data_column_types {data_column_types} is different from the length of "
f"data_column_snowflake_quoted_identifiers {data_column_snowflake_quoted_identifiers}"
)
if index_column_types is not None:
assert len(index_column_types) == len(index_column_snowflake_quoted_identifiers)
assert len(index_column_types) == len(
index_column_snowflake_quoted_identifiers
), (
f"The length of index_column_types {index_column_types} is different from the length of "
f"index_column_snowflake_quoted_identifiers {index_column_snowflake_quoted_identifiers}"
)

return MappingProxyType(
{
Expand Down Expand Up @@ -164,8 +174,8 @@ def create(
data_column_snowflake_quoted_identifiers: list[str],
index_column_pandas_labels: list[Hashable],
index_column_snowflake_quoted_identifiers: list[str],
data_column_types: Optional[list[Optional[SnowparkPandasType]]] = None,
index_column_types: Optional[list[Optional[SnowparkPandasType]]] = None,
data_column_types: Optional[list[Optional[SnowparkPandasType]]],
index_column_types: Optional[list[Optional[SnowparkPandasType]]],
) -> "InternalFrame":
"""
Args:
Expand Down Expand Up @@ -630,7 +640,14 @@ def get_snowflake_identifiers_for_levels(self, levels: list[int]) -> list[str]:

def get_snowflake_identifiers_and_pandas_labels_from_levels(
self, levels: list[int]
) -> tuple[list[Hashable], list[str], list[Hashable], list[str]]:
) -> tuple[
list[Hashable],
list[str],
list[Optional[SnowparkPandasType]],
list[Hashable],
list[str],
list[Optional[SnowparkPandasType]],
]:
"""
Selects snowflake identifiers and pandas labels from index columns in `levels`.
Also returns snowflake identifiers and pandas labels not in `levels`.
Expand All @@ -639,36 +656,45 @@ def get_snowflake_identifiers_and_pandas_labels_from_levels(
levels: A list of integers represents levels in pandas Index.
Returns:
A tuple contains 4 lists:
A tuple contains 6 lists:
1. The first list contains snowflake identifiers of index columns in `levels`.
2. The second list contains pandas labels of index columns in `levels`.
3. The third list contains snowflake identifiers of index columns not in `levels`.
4. The fourth list contains pandas labels of index columns not in `levels`.
3. The third list contains Snowpark pandas types of index columns in `levels`.
4. The fourth list contains snowflake identifiers of index columns not in `levels`.
5. The fifth list contains pandas labels of index columns not in `levels`.
6. The sixth list contains Snowpark pandas types of index columns not in `levels`.
"""
index_column_pandas_labels_in_levels = []
index_column_snowflake_quoted_identifiers_in_levels = []
index_column_types_in_levels = []
index_column_pandas_labels_not_in_levels = []
index_column_snowflake_quoted_identifiers_not_in_levels = []
for idx, (identifier, label) in enumerate(
index_column_types_not_in_levels = []
for idx, (identifier, label, type) in enumerate(
zip(
self.index_column_snowflake_quoted_identifiers,
self.index_column_pandas_labels,
self.cached_index_column_snowpark_pandas_types,
)
):
if idx in levels:
index_column_pandas_labels_in_levels.append(label)
index_column_snowflake_quoted_identifiers_in_levels.append(identifier)
index_column_types_in_levels.append(type)
else:
index_column_pandas_labels_not_in_levels.append(label)
index_column_snowflake_quoted_identifiers_not_in_levels.append(
identifier
)
index_column_types_not_in_levels.append(type)

return (
index_column_pandas_labels_in_levels,
index_column_snowflake_quoted_identifiers_in_levels,
index_column_types_in_levels,
index_column_pandas_labels_not_in_levels,
index_column_snowflake_quoted_identifiers_not_in_levels,
index_column_types_not_in_levels,
)

@functools.cached_property
Expand Down Expand Up @@ -855,8 +881,10 @@ def ensure_row_position_column(self) -> "InternalFrame":
data_column_pandas_labels=self.data_column_pandas_labels,
data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers,
data_column_pandas_index_names=self.data_column_pandas_index_names,
data_column_types=self.cached_data_column_snowpark_pandas_types,
index_column_pandas_labels=self.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
index_column_types=self.cached_index_column_snowpark_pandas_types,
)

def ensure_row_count_column(self) -> "InternalFrame":
Expand All @@ -873,6 +901,8 @@ def ensure_row_count_column(self) -> "InternalFrame":
data_column_pandas_index_names=self.data_column_pandas_index_names,
index_column_pandas_labels=self.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
data_column_types=self.cached_data_column_snowpark_pandas_types,
index_column_types=self.cached_index_column_snowpark_pandas_types,
)

def persist_to_temporary_table(self) -> "InternalFrame":
Expand All @@ -886,9 +916,11 @@ def persist_to_temporary_table(self) -> "InternalFrame":
ordered_dataframe=cache_result(self.ordered_dataframe),
data_column_pandas_labels=self.data_column_pandas_labels,
data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers,
data_column_types=self.cached_data_column_snowpark_pandas_types,
data_column_pandas_index_names=self.data_column_pandas_index_names,
index_column_pandas_labels=self.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
index_column_types=self.cached_index_column_snowpark_pandas_types,
)

def append_column(
Expand Down Expand Up @@ -943,6 +975,8 @@ def append_column(
data_column_pandas_index_names=self.data_column_pandas_index_names,
index_column_pandas_labels=self.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
data_column_types=self.cached_data_column_snowpark_pandas_types + [None],
index_column_types=self.cached_index_column_snowpark_pandas_types,
)

def project_columns(
Expand Down Expand Up @@ -981,6 +1015,8 @@ def project_columns(
data_column_pandas_index_names=self.data_column_pandas_index_names,
index_column_pandas_labels=self.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
data_column_types=None,
index_column_types=self.cached_index_column_snowpark_pandas_types,
)

def rename_snowflake_identifiers(
Expand Down Expand Up @@ -1062,11 +1098,11 @@ def get_updated_identifiers(identifiers: list[str]) -> list[str]:
self.data_column_snowflake_quoted_identifiers
),
data_column_pandas_index_names=self.data_column_pandas_index_names,
data_column_types=self.cached_data_column_snowpark_pandas_types,
index_column_pandas_labels=self.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=get_updated_identifiers(
self.index_column_snowflake_quoted_identifiers
),
data_column_types=self.cached_data_column_snowpark_pandas_types,
index_column_types=self.cached_index_column_snowpark_pandas_types,
)

Expand All @@ -1080,7 +1116,7 @@ def update_snowflake_quoted_identifiers_with_expressions(
This function takes a mapping from existing snowflake quoted identifiers to
new Snowpark column expressions and points the existing quoted identifiers to the
column expressions provided by the mapping. For optimization purposes,
existing expressions are kept as columns. This does not change pandas labels.
existing expressions are kept as columns. This does not change pandas labels and cached Snwopark pandas types.
The process involves the following steps:
Expand Down Expand Up @@ -1170,8 +1206,10 @@ def update_snowflake_quoted_identifiers_with_expressions(
data_column_pandas_labels=self.data_column_pandas_labels,
data_column_snowflake_quoted_identifiers=new_data_column_snowflake_quoted_identifiers,
data_column_pandas_index_names=self.data_column_pandas_index_names,
data_column_types=self.cached_data_column_snowpark_pandas_types,
index_column_pandas_labels=self.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=new_index_column_snowflake_quoted_identifiers,
index_column_types=self.cached_index_column_snowpark_pandas_types,
),
existing_id_to_new_id_mapping,
)
Expand Down Expand Up @@ -1239,6 +1277,8 @@ def select_active_columns(self) -> "InternalFrame":
data_column_pandas_labels=self.data_column_pandas_labels,
data_column_snowflake_quoted_identifiers=self.data_column_snowflake_quoted_identifiers,
data_column_pandas_index_names=self.data_column_pandas_index_names,
data_column_types=self.cached_data_column_snowpark_pandas_types,
index_column_types=self.cached_index_column_snowpark_pandas_types,
)

def strip_duplicates(
Expand Down Expand Up @@ -1305,6 +1345,8 @@ def strip_duplicates(
data_column_pandas_index_names=frame.data_column_pandas_index_names,
index_column_pandas_labels=frame.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers,
data_column_types=frame.cached_data_column_snowpark_pandas_types,
index_column_types=frame.cached_index_column_snowpark_pandas_types,
)

def filter(
Expand All @@ -1325,6 +1367,8 @@ def filter(
data_column_pandas_index_names=self.data_column_pandas_index_names,
index_column_pandas_labels=self.index_column_pandas_labels,
index_column_snowflake_quoted_identifiers=self.index_column_snowflake_quoted_identifiers,
data_column_types=self.cached_data_column_snowpark_pandas_types,
index_column_types=self.cached_index_column_snowpark_pandas_types,
)

def normalize_snowflake_quoted_identifiers_with_pandas_label(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ def _create_qc_from_snowpark_dataframe(
index_column_snowflake_quoted_identifiers=[
odf.row_position_snowflake_quoted_identifier
],
data_column_types=None,
index_column_types=None,
)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,8 @@ def get_frame_with_groupby_columns_as_index(
data_column_pandas_labels=internal_frame.data_column_pandas_labels,
data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers,
data_column_pandas_index_names=internal_frame.data_column_pandas_index_names,
data_column_types=internal_frame.cached_data_column_snowpark_pandas_types,
index_column_types=internal_frame.cached_index_column_snowpark_pandas_types,
)
)

Expand Down
Loading

0 comments on commit 8862b80

Please sign in to comment.