diff --git a/CHANGELOG.md b/CHANGELOG.md index 1731ad4ce96..8fb5ac38c74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ - Added support for `Index.value_counts`. - Added support for `Series.dt.day_name` and `Series.dt.month_name`. - Added support for indexing on Index, e.g., `df.index[:10]`. +- Added support for `DataFrame.unstack` and `Series.unstack`. #### Improvements - Removed the public preview warning message upon importing Snowpark pandas. diff --git a/docs/source/modin/dataframe.rst b/docs/source/modin/dataframe.rst index ef55db3145c..24cf382cb57 100644 --- a/docs/source/modin/dataframe.rst +++ b/docs/source/modin/dataframe.rst @@ -200,6 +200,7 @@ DataFrame DataFrame.stack DataFrame.T DataFrame.transpose + DataFrame.unstack .. rubric:: Combining / comparing / joining / merging diff --git a/docs/source/modin/series.rst b/docs/source/modin/series.rst index 12fbdb8b53c..2dfc499fa57 100644 --- a/docs/source/modin/series.rst +++ b/docs/source/modin/series.rst @@ -199,6 +199,7 @@ Series Series.sort_values Series.sort_index + Series.unstack Series.nlargest Series.nsmallest Series.squeeze diff --git a/docs/source/modin/supported/dataframe_supported.rst b/docs/source/modin/supported/dataframe_supported.rst index c5a2169864b..7d45e253a2c 100644 --- a/docs/source/modin/supported/dataframe_supported.rst +++ b/docs/source/modin/supported/dataframe_supported.rst @@ -483,7 +483,7 @@ Methods +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``tz_localize`` | N | | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ -| ``unstack`` | N | | | +| ``unstack`` | P | ``sort`` | ``N`` for non-integer ``level``. | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``update`` | Y | | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/series_supported.rst b/docs/source/modin/supported/series_supported.rst index 85e2e11c40c..e74173606a9 100644 --- a/docs/source/modin/supported/series_supported.rst +++ b/docs/source/modin/supported/series_supported.rst @@ -460,7 +460,7 @@ Methods +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``unique`` | Y | | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ -| ``unstack`` | N | | | +| ``unstack`` | P | ``sort`` | ``N`` for non-integer ``level``. | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``update`` | Y | | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py index 6fc2c4a2568..19725df4fe3 100644 --- a/src/snowflake/snowpark/modin/pandas/dataframe.py +++ b/src/snowflake/snowpark/modin/pandas/dataframe.py @@ -1800,23 +1800,33 @@ def nsmallest(self, n, columns, keep="first"): # noqa: PR01, RT01, D200 ) ) - @dataframe_not_implemented() - def unstack(self, level=-1, fill_value=None): # noqa: PR01, RT01, D200 + def unstack( + self, + level: int | str | list = -1, + fill_value: int | str | dict = None, + sort: bool = True, + ): """ Pivot a level of the (necessarily hierarchical) index labels. """ # TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions - if not isinstance(self.index, pandas.MultiIndex) or ( - isinstance(self.index, pandas.MultiIndex) - and is_list_like(level) - and len(level) == self.index.nlevels + # This ensures that non-pandas MultiIndex objects are caught. + nlevels = self._query_compiler.nlevels() + is_multiindex = nlevels > 1 + + if not is_multiindex or ( + is_multiindex and is_list_like(level) and len(level) == nlevels ): return self._reduce_dimension( - query_compiler=self._query_compiler.unstack(level, fill_value) + query_compiler=self._query_compiler.unstack( + level, fill_value, sort, is_series_input=False + ) ) else: return self.__constructor__( - query_compiler=self._query_compiler.unstack(level, fill_value) + query_compiler=self._query_compiler.unstack( + level, fill_value, sort, is_series_input=False + ) ) def pivot( diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py index 93bdcdf2801..daa1c51c488 100644 --- a/src/snowflake/snowpark/modin/pandas/series.py +++ b/src/snowflake/snowpark/modin/pandas/series.py @@ -1562,19 +1562,31 @@ def set_axis( copy=copy, ) - @series_not_implemented() - def unstack(self, level=-1, fill_value=None): # noqa: PR01, RT01, D200 + def unstack( + self, + level: int | str | list = -1, + fill_value: int | str | dict = None, + sort: bool = True, + ): """ Unstack, also known as pivot, Series with MultiIndex to produce DataFrame. """ # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions from snowflake.snowpark.modin.pandas.dataframe import DataFrame - result = DataFrame( - query_compiler=self._query_compiler.unstack(level, fill_value) - ) + # We can't unstack a Series object, if we don't have a MultiIndex. + if self._query_compiler.has_multiindex: + result = DataFrame( + query_compiler=self._query_compiler.unstack( + level, fill_value, sort, is_series_input=True + ) + ) + else: + raise ValueError( # pragma: no cover + f"index must be a MultiIndex to unstack, {type(self.index)} was passed" + ) - return result.droplevel(0, axis=1) if result.columns.nlevels > 1 else result + return result @series_not_implemented() @property diff --git a/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py index e6bf0486c0e..3c1bd87bdef 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py @@ -4,6 +4,7 @@ import json import typing from collections.abc import Hashable +from enum import Enum from typing import Optional from snowflake.snowpark._internal.analyzer.analyzer_utils import ( @@ -58,6 +59,11 @@ DEFAULT_PANDAS_UNPIVOT_VALUE_NAME = "value" +class StackOperation(Enum): + STACK = "stack" + UNSTACK = "unstack" + + class UnpivotResultInfo(typing.NamedTuple): """ Structure that stores information about the unpivot result. diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index 79e4cb141f6..332e533e715 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -279,6 +279,7 @@ ) from snowflake.snowpark.modin.plugin._internal.unpivot_utils import ( UNPIVOT_NULL_REPLACE_VALUE, + StackOperation, unpivot, unpivot_empty_df, ) @@ -314,6 +315,7 @@ parse_object_construct_snowflake_quoted_identifier_and_extract_pandas_label, parse_snowflake_object_construct_identifier_to_map, snowpark_to_pandas_helper, + unquote_name_if_quoted, ) from snowflake.snowpark.modin.plugin._internal.where_utils import ( validate_expected_boolean_data_columns, @@ -15863,34 +15865,201 @@ def stack( "Snowpark pandas doesn't support multiindex columns in stack API" ) - index_names = ["index"] - # Stack is equivalent to doing df.melt() with index reset, sorting the values, then setting the index - # Note that we always use sort_rows_by_column_values even if sort is False - qc = ( - self.reset_index() - .melt( - id_vars=index_names, - value_vars=self.columns, - var_name="index_second_level", - value_name=MODIN_UNNAMED_SERIES_LABEL, - ignore_index=False, + qc = self._stack_helper(operation=StackOperation.STACK) + + if dropna: + return qc.dropna(axis=0, how="any", thresh=None) + else: + return qc + + def unstack( + self, + level: Union[int, str, list] = -1, + fill_value: Optional[Union[int, str, dict]] = None, + sort: bool = True, + is_series_input: bool = False, + ) -> "SnowflakeQueryCompiler": + """ + Pivot a level of the (necessarily hierarchical) index labels. + + Returns a DataFrame having a new level of column labels whose + inner-most level consists of the pivoted index labels. + + If the index is not a MultiIndex, the output will be a Series + (the analogue of stack when the columns are not a MultiIndex). + + Parameters + ---------- + level : int, str, list, default -1 + Level(s) of index to unstack, can pass level name. + + fillna : int, str, dict, optional + Replace NaN with this value if the unstack produces missing values. + + sort : bool, default True + Sort the level(s) in the resulting MultiIndex columns. + + is_series_input : bool, default False + Whether the input is a Series, in which case we call `droplevel` + """ + if not isinstance(level, int): + # TODO: SNOW-1558364: Support index name passed to level parameter + ErrorMessage.not_implemented( + "Snowpark pandas DataFrame/Series.unstack does not yet support a non-integer `level` parameter" + ) + if not sort: + ErrorMessage.not_implemented( + "Snowpark pandas DataFrame/Series.unstack does not yet support the `sort` parameter" ) - .sort_rows_by_column_values( - columns=index_names, # type: ignore + if self._modin_frame.is_multiindex(axis=1): + ErrorMessage.not_implemented( + "Snowpark pandas doesn't support multiindex columns in the unstack API" + ) + + level = [level] + + index_names = self.get_index_names() + + # Check to see if we have a MultiIndex, if we do, make sure we remove + # the appropriate level(s), and we pivot accordingly. + if len(index_names) > 1: + # Resetting the index keeps the index columns as the first n data columns + qc = self.reset_index() + index_cols = qc._modin_frame.data_column_pandas_labels[0 : len(index_names)] + pivot_cols = [index_cols[lev] for lev in level] # type: ignore + res_index_cols = [] + column_names_to_reset_to_none = [] + for i in range(len(index_names)): + if index_names[i] is None: + # We need to track the names where the index and columns originally had no name + # in order to reset those names back to None after the operation + column_names_to_reset_to_none.append( + qc._modin_frame.data_column_pandas_labels[i] + ) + col = index_cols[i] + if col not in pivot_cols: + res_index_cols.append(col) + vals = [ + c + for c in self.columns + if c not in res_index_cols and c not in pivot_cols + ] + + qc = qc.pivot_table( + columns=pivot_cols, + index=res_index_cols, + values=vals, + aggfunc="min", + fill_value=fill_value, + margins=False, + dropna=True, + margins_name="All", + observed=False, + sort=sort, + ) + + # Set the original unnamed index values back to None + output_index_names = qc.get_index_names() + output_index_names_replace_level_with_none = [ + None + if output_index_names[i] in column_names_to_reset_to_none + else output_index_names[i] + for i in range(len(output_index_names)) + ] + qc = qc.set_index_names(output_index_names_replace_level_with_none) + # Set the unnamed column values back to None + output_column_names = qc.columns.names + output_column_names_replace_level_with_none = [ + None + if output_column_names[i] in column_names_to_reset_to_none + else output_column_names[i] + for i in range(len(output_column_names)) + ] + qc = qc.set_columns( + qc.columns.set_names(output_column_names_replace_level_with_none) + ) + else: + qc = self._stack_helper(operation=StackOperation.UNSTACK) + + if is_series_input and qc.columns.nlevels > 1: + # If input is Series and output is MultiIndex, drop the top level of the MultiIndex + qc = qc.set_columns(qc.columns.droplevel()) + return qc + + def _stack_helper( + self, + operation: StackOperation, + ) -> "SnowflakeQueryCompiler": + """ + Helper function that performs stacking or unstacking operation on single index dataframe/series. + + Parameters + ---------- + operation : StackOperation.STACK or StackOperation.UNSTACK + The operation being performed. + """ + index_names = self.get_index_names() + # Resetting the index keeps the index columns as the first n data columns + qc = self.reset_index() + index_cols = qc._modin_frame.data_column_pandas_labels[0 : len(index_names)] + column_names_to_reset_to_none = [] + for i in range(len(index_names)): + if index_names[i] is None: + # We need to track the names where the index and columns originally had no name + # in order to reset those names back to None after the operation + column_names_to_reset_to_none.append( + qc._modin_frame.data_column_pandas_labels[i] + ) + + # Track the new column name for the original unnamed column + if self.columns.name is None: + quoted_col_label = ( + qc._modin_frame.ordered_dataframe.generate_snowflake_quoted_identifiers( + pandas_labels=["index_second_level"] + )[0] + ) + col_label = unquote_name_if_quoted(quoted_col_label) + column_names_to_reset_to_none.append(col_label) + else: + col_label = self.columns.name + + qc = qc.melt( + id_vars=index_cols, # type: ignore + value_vars=self.columns, + var_name=col_label, + value_name=MODIN_UNNAMED_SERIES_LABEL, + ignore_index=False, + ) + + if operation == StackOperation.STACK: + # Only sort rows by column values in case of 'stack' + # For 'unstack' maintain the row position order + qc = qc.sort_rows_by_column_values( + columns=index_cols, # type: ignore ascending=[True], kind="stable", na_position="last", ignore_index=False, ) - .replace(to_replace=UNPIVOT_NULL_REPLACE_VALUE, value=np.nan) - .set_index_from_columns(index_names + ["index_second_level"]) # type: ignore - .set_index_names([None, None]) - ) - if dropna: - return qc.dropna(axis=0, how="any", thresh=None) + # TODO: SNOW-1524695: Remove the following replace once "NULL_REPLACE" values are fixed for 'melt' + qc = qc.replace(to_replace=UNPIVOT_NULL_REPLACE_VALUE, value=np.nan) + + if operation == StackOperation.STACK: + qc = qc.set_index_from_columns(index_cols + [col_label]) # type: ignore else: - return qc + qc = qc.set_index_from_columns([col_label] + index_cols) # type: ignore + + # Set the original unnamed index and column values back to None + output_index_names = qc.get_index_names() + output_index_names = [ + None + if output_index_names[i] in column_names_to_reset_to_none + else output_index_names[i] + for i in range(len(output_index_names)) + ] + qc = qc.set_index_names(output_index_names) + return qc def corr( self, diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py index 4c6ccbcc648..ebc9c766d35 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py @@ -2542,6 +2542,66 @@ def nsmallest(): def unstack(): """ Pivot a level of the (necessarily hierarchical) index labels. + + Returns a DataFrame having a new level of column labels whose + inner-most level consists of the pivoted index labels. + + If the index is not a MultiIndex, the output will be a Series + (the analogue of stack when the columns are not a MultiIndex). + + Parameters + ---------- + level : int, str, list, default -1 + Level(s) of index to unstack, can pass level name. + + fillna : int, str, dict, optional + Replace NaN with this value if the unstack produces missing values. + + sort : bool, default True + Sort the level(s) in the resulting MultiIndex columns. + + Returns + ------- + Series or DataFrame + + Notes + ----- + Supports only integer ``level`` and ``sort = True``. Internally, calls ``pivot_table`` + or ``melt`` to perform `unstack` operation. + + See Also + -------- + DataFrame.pivot : Pivot without aggregation that can handle + non-numeric data. + DataFrame.stack : Pivot a level of the column labels (inverse + operation from unstack). + + Examples + -------- + >>> index = pd.MultiIndex.from_tuples([('one', 'a'), ('one', 'b'), + ... ('two', 'a'), ('two', 'b')]) + >>> s = pd.Series(np.arange(1.0, 5.0), index=index) + >>> s + one a 1.0 + b 2.0 + two a 3.0 + b 4.0 + dtype: float64 + >>> s.unstack(level=-1) + a b + one 1.0 2.0 + two 3.0 4.0 + >>> s.unstack(level=0) + one two + a 1.0 3.0 + b 2.0 4.0 + >>> df = s.unstack(level=0) + >>> df.unstack() + one a 1.0 + b 2.0 + two a 3.0 + b 4.0 + dtype: float64 """ def pad(): diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py index 48e06f402a2..33624ba05fd 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py @@ -2164,6 +2164,46 @@ def set_axis(): def unstack(): """ Unstack, also known as pivot, Series with MultiIndex to produce DataFrame. + + Parameters + ---------- + level : int, str, list, default -1 + Level(s) of index to unstack, can pass level name. + + fillna : int, str, dict, optional + Replace NaN with this value if the unstack produces missing values. + + sort : bool, default True + Sort the level(s) in the resulting MultiIndex columns. + + Returns + ------- + Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` + + Notes + ----- + Supports only integer ``level`` and ``sort = True``. Internally, calls ``pivot_table`` + or ``melt`` to perform ``unstack`` operation. + + Examples + -------- + >>> s = pd.Series([1, 2, 3, 4], + ... index=pd.MultiIndex.from_product([['one', 'two'], + ... ['a', 'b']])) + >>> s + one a 1 + b 2 + two a 3 + b 4 + dtype: int64 + >>> s.unstack(level=-1) + a b + one 1 2 + two 3 4 + >>> s.unstack(level=0) + one two + a 1 3 + b 2 4 """ @property diff --git a/tests/integ/modin/frame/test_stack.py b/tests/integ/modin/frame/test_stack.py index 81743028faa..9b06c32ff0f 100644 --- a/tests/integ/modin/frame/test_stack.py +++ b/tests/integ/modin/frame/test_stack.py @@ -4,6 +4,7 @@ import modin.pandas as pd import numpy as np +import pandas as native_pd import pytest from tests.integ.modin.sql_counter import sql_count_checker @@ -27,6 +28,22 @@ def test_stack(data, index, columns, dropna, sort): ) +@pytest.mark.parametrize("dropna", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +@sql_count_checker(query_count=1) +def test_stack_with_index_name(dropna, sort): + index = native_pd.Index(data=["cat", "dog"], name="hello") + native_df = native_pd.DataFrame( + data=[[0, 1], [2, 3]], index=index, columns=["weight", "height"] + ) + snow_df = pd.DataFrame(native_df) + eval_snowpark_pandas_result( + snow_df, + native_df, + lambda df: df.stack(dropna=dropna, sort=sort), + ) + + @sql_count_checker(query_count=0) def test_stack_level_unsupported(): df_single_level_cols = pd.DataFrame( diff --git a/tests/integ/modin/frame/test_unstack.py b/tests/integ/modin/frame/test_unstack.py new file mode 100644 index 00000000000..f6f0e00df5f --- /dev/null +++ b/tests/integ/modin/frame/test_unstack.py @@ -0,0 +1,89 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import numpy as np +import pandas as native_pd +import pytest + +from tests.integ.modin.sql_counter import sql_count_checker +from tests.integ.modin.utils import eval_snowpark_pandas_result + + +@pytest.mark.parametrize( + "index_names", + [ + [None, None], + ["hello", "world"], + [None, "world"], + ["hello", None], + ], +) +@sql_count_checker(query_count=1) +def test_unstack_input_no_multiindex(index_names): + index = native_pd.MultiIndex.from_tuples( + tuples=[("one", "a"), ("one", "b"), ("two", "a"), ("two", "b")], + names=index_names, + ) + # Note we call unstack below to create a dataframe without a multiindex before + # calling unstack again + native_df = native_pd.Series(np.arange(1.0, 5.0), index=index).unstack(level=0) + snow_df = pd.DataFrame(native_df) + eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.unstack()) + + +@pytest.mark.parametrize( + "index_names", + [ + [None, None], + ["hello", "world"], + ["hello", None], + [None, "world"], + ], +) +@sql_count_checker(query_count=1) +def test_unstack_multiindex(index_names): + index = pd.MultiIndex.from_product( + iterables=[[2, 1], ["a", "b"]], names=index_names + ) + native_df = native_pd.DataFrame(np.random.randn(4), index=index, columns=["A"]) + snow_df = pd.DataFrame(native_df) + eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.unstack()) + + +@sql_count_checker(query_count=1, join_count=1) +def test_unstack_multiple_columns(): + index = pd.MultiIndex.from_product([[2, 1], ["a", "b"]]) + native_df = native_pd.DataFrame( + {"A": [1, 2, 3, 4], "B": ["f", "o", "u", "r"]}, index=index + ) + snow_df = pd.DataFrame(native_df) + eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.unstack()) + + +@sql_count_checker(query_count=0) +def test_unstack_sort_notimplemented(): + index = pd.MultiIndex.from_product([[2, 1], ["a", "b"]]) + native_df = native_pd.DataFrame(np.random.randn(4), index=index, columns=["A"]) + snow_df = pd.DataFrame(native_df) + + with pytest.raises( + NotImplementedError, + match="Snowpark pandas DataFrame/Series.unstack does not yet support the `sort` parameter", + ): + snow_df.unstack(sort=False) + + +@sql_count_checker(query_count=0) +def test_unstack_non_integer_level_notimplemented(): + # Still requires one query at the frontend layer checking number of levels + index = pd.MultiIndex.from_product([[2, 1], ["a", "b"]]) + native_df = native_pd.DataFrame(np.random.randn(4), index=index, columns=["A"]) + snow_df = pd.DataFrame(native_df) + + with pytest.raises( + NotImplementedError, + match="Snowpark pandas DataFrame/Series.unstack does not yet support a non-integer `level` parameter", + ): + snow_df.unstack(level=[0, 1]) diff --git a/tests/integ/modin/series/test_unstack.py b/tests/integ/modin/series/test_unstack.py new file mode 100644 index 00000000000..9eab9e6df1b --- /dev/null +++ b/tests/integ/modin/series/test_unstack.py @@ -0,0 +1,67 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import numpy as np +import pandas as native_pd +import pytest + +from tests.integ.modin.sql_counter import sql_count_checker +from tests.integ.modin.utils import eval_snowpark_pandas_result + + +@pytest.mark.parametrize("level", [-1, 0]) +@pytest.mark.parametrize( + "index_names", + [ + [None, None], + ["hello", "world"], + ["hello", None], + [None, "world"], + ], +) +@sql_count_checker(query_count=1) +def test_unstack_multiindex(level, index_names): + index = native_pd.MultiIndex.from_tuples( + tuples=[("one", "a"), ("one", "b"), ("two", "a"), ("two", "b")], + names=index_names, + ) + native_ser = native_pd.Series(np.arange(1.0, 5.0), index=index) + snow_ser = pd.Series(native_ser) + + eval_snowpark_pandas_result( + snow_ser, + native_ser, + lambda ser: ser.unstack(level=level), + ) + + +@sql_count_checker(query_count=0) +def test_unstack_sort_notimplemented(): + index = native_pd.MultiIndex.from_tuples( + [("one", "a"), ("one", "b"), ("two", "a"), ("two", "b")] + ) + native_ser = native_pd.Series(np.arange(1.0, 5.0), index=index) + snow_ser = pd.Series(native_ser) + + with pytest.raises( + NotImplementedError, + match="Snowpark pandas DataFrame/Series.unstack does not yet support the `sort` parameter", + ): + snow_ser.unstack(sort=False) + + +@sql_count_checker(query_count=0) +def test_unstack_non_integer_level_notimplemented(): + index = native_pd.MultiIndex.from_tuples( + [("one", "a"), ("one", "b"), ("two", "a"), ("two", "b")] + ) + native_ser = native_pd.Series(np.arange(1.0, 5.0), index=index) + snow_ser = pd.Series(native_ser) + + with pytest.raises( + NotImplementedError, + match="Snowpark pandas DataFrame/Series.unstack does not yet support a non-integer `level` parameter", + ): + snow_ser.unstack(level=[0, 1]) diff --git a/tests/unit/modin/test_unsupported.py b/tests/unit/modin/test_unsupported.py index e63263a77be..e2a78f6b377 100644 --- a/tests/unit/modin/test_unsupported.py +++ b/tests/unit/modin/test_unsupported.py @@ -123,7 +123,6 @@ def test_unsupported_general(general_method, kwargs): ["truncate", {}], ["tz_convert", {"tz": ""}], ["tz_localize", {"tz": ""}], - ["unstack", {}], ["xs", {"key": ""}], ["__dataframe__", {}], ], @@ -215,7 +214,6 @@ def test_unsupported_df_generator(df_method, kwargs): ["truncate", {}], ["tz_convert", {"tz": ""}], ["tz_localize", {"tz": ""}], - ["unstack", {}], ["view", {}], ["xs", {"key": ""}], ],