diff --git a/CHANGELOG.md b/CHANGELOG.md index 95364a72e8a..1049c88c29c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -83,6 +83,11 @@ - Stopped ignoring nanoseconds in `pd.Timedelta` scalars. - Fixed AssertionError in tree of binary operations. +#### Behavior Change + +- When calling `DataFrame.set_index`, or setting `DataFrame.index` or `Series.index`, with a new index that does not match the current length of the `Series`/`DataFrame` object, a `ValueError` is no longer raised. When the `Series`/`DataFrame` object is longer than the new index, the `Series`/`DataFrame`'s new index is filled with `NaN` values for the "extra" elements. When the `Series`/`DataFrame` object is shorter than the new index, the extra values in the new index are ignored—`Series` and `DataFrame` stay the same length `n`, and use only the first `n` values of the new index. + + ## 1.21.0 (2024-08-19) ### Snowpark Python API Updates @@ -186,10 +191,6 @@ - Fixed a bug in `Series.reset_index(drop=True)` where the result name may be wrong. - Fixed a bug in `Groupby.first/last` ordering by the correct columns in the underlying window expression. -#### Behavior Change - -- When calling `DataFrame.set_index`, or setting `DataFrame.index` or `Series.index`, with a new index that does match the current length of the `Series`/`DataFrame` object does not match with the new index's length, a `ValueError` is no longer raised. When the `Series`/`DataFrame` object is longer than the new index, the `Series`/`DataFrame`'s new index is filled with `NaN` values for the "extra" elements. When the `Series`/`DataFrame` object is shorter than the new index, the extra values in the new index are ignored—`Series` and `DataFrame` stay the same length `n`, and use only the first `n` values of the new index. - ## 1.20.0 (2024-07-17) ### Snowpark Python API Updates diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py index 10326088d4b..26071049237 100644 --- a/src/snowflake/snowpark/modin/pandas/base.py +++ b/src/snowflake/snowpark/modin/pandas/base.py @@ -604,27 +604,6 @@ def _to_series_list(self, index: pd.Index) -> list[pd.Series]: return [pd.Series(index)] def _set_index(self, new_index: Axes) -> None: - """ - Set the index for this `Series`/`DataFrame`. - - Parameters - ---------- - new_index : pandas.Index - The new index to set this. - - Note - ---- - When setting `DataFrame.index` or `Series.index` where the length of the - `Series`/`DataFrame` object does not match with the new index's length, - pandas raises a ValueError. Snowpark pandas does not raise this error; - this operation is valid. - When the `Series`/`DataFrame` object is longer than the new index, - the `Series`/`DataFrame`'s new index is filled with `NaN` values for - the "extra" elements. When the `Series`/`DataFrame` object is shorter than - the new index, the extra values in the new index are ignored—`Series` and - `DataFrame` stay the same length `n`, and use only the first `n` values of - the new index. - """ # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset self._update_inplace( new_query_compiler=self._query_compiler.set_index( @@ -668,27 +647,6 @@ def set_axis( return obj def _get_index(self): - """ - Get the index for this Series/DataFrame. - - Returns - ------- - pandas.Index - The union of all indexes across the partitions. - - Note - ---- - When setting `DataFrame.index` or `Series.index` where the length of the - `Series`/`DataFrame` object does not match with the new index's length, - pandas raises a ValueError. Snowpark pandas does not raise this error; - this operation is valid. - When the `Series`/`DataFrame` object is longer than the new index, - the `Series`/`DataFrame`'s new index is filled with `NaN` values for - the "extra" elements. When the `Series`/`DataFrame` object is shorter than - the new index, the extra values in the new index are ignored—`Series` and - `DataFrame` stay the same length `n`, and use only the first `n` values of - the new index. - """ # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset from snowflake.snowpark.modin.plugin.extensions.index import Index diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/base.py b/src/snowflake/snowpark/modin/plugin/docstrings/base.py index a6a0aff1af4..3ba4f2f2dab 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/base.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/base.py @@ -386,6 +386,25 @@ Series([], dtype: bool) """ +_get_set_index_doc = """ +{desc} + +{parameters_or_returns} + +Note +---- +When setting `DataFrame.index` or `Series.index` where the length of the +`Series`/`DataFrame` object does not match with the new index's length, +pandas raises a ValueError. Snowpark pandas does not raise this error; +this operation is valid. +When the `Series`/`DataFrame` object is longer than the new index, +the `Series`/`DataFrame`'s new index is filled with `NaN` values for +the "extra" elements. When the `Series`/`DataFrame` object is shorter than +the new index, the extra values in the new index are ignored—`Series` and +`DataFrame` stay the same length `n`, and use only the first `n` values of +the new index. +""" + class BasePandasDataset: """ @@ -3594,3 +3613,21 @@ def __array_function__(): BasePandasDataset The result of the ufunc applied to the `BasePandasDataset`. """ + + @doc( + _get_set_index_doc, + desc="Get the index for this `Series`/`DataFrame`.", + parameters_or_returns="Returns\n-------\nIndex\n The index for this `Series`/`DataFrame`.", + ) + def _get_index(): + pass + + @doc( + _get_set_index_doc, + desc="Set the index for this `Series`/`DataFrame`.", + parameters_or_returns="Parameters\n----------\nnew_index : Index\n The new index to set.", + ) + def _set_index(): + pass + + index = property(_get_index, _set_index)