diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bb2cd5c94d..005aaa3a8dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -78,6 +78,8 @@ - Added support for `DatetimeIndex.round`, `DatetimeIndex.floor` and `DatetimeIndex.ceil`. - Added support for `Series.dt.days_in_month` and `Series.dt.daysinmonth`. - Added support for `DataFrameGroupBy.value_counts` and `SeriesGroupBy.value_counts`. +- Added support for `Series.is_monotonic_increasing` and `Series.is_monotonic_decreasing`. +- Added support for `Index.is_monotonic_increasing` and `Index.is_monotonic_decreasing`. - Added support for `pd.crosstab`. #### Improvements diff --git a/docs/source/modin/series.rst b/docs/source/modin/series.rst index 507d6663f32..fbd936db2f9 100644 --- a/docs/source/modin/series.rst +++ b/docs/source/modin/series.rst @@ -26,6 +26,8 @@ Series Series.equals Series.empty Series.hasnans + Series.is_monotonic_increasing + Series.is_monotonic_decreasing Series.name Series.ndim Series.shape diff --git a/docs/source/modin/supported/index_supported.rst b/docs/source/modin/supported/index_supported.rst index 9db80686454..0c413c201fb 100644 --- a/docs/source/modin/supported/index_supported.rst +++ b/docs/source/modin/supported/index_supported.rst @@ -20,9 +20,9 @@ Attributes +-----------------------------+---------------------------------+----------------------------------------------------+ | ``values`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_increasing`` | N | | +| ``is_monotonic_increasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_decreasing`` | N | | +| ``is_monotonic_decreasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``is_unique`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/series_supported.rst b/docs/source/modin/supported/series_supported.rst index 331be4d0298..618b88d5034 100644 --- a/docs/source/modin/supported/series_supported.rst +++ b/docs/source/modin/supported/series_supported.rst @@ -43,9 +43,9 @@ Attributes +-----------------------------+---------------------------------+----------------------------------------------------+ | ``index`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_decreasing`` | N | | +| ``is_monotonic_decreasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_increasing`` | N | | +| ``is_monotonic_increasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``is_unique`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index c99a55a596d..848c5e438b3 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -2286,9 +2286,82 @@ def reindex( else: return self._reindex_axis_1(labels=labels, **kwargs) + def is_monotonic_decreasing(self) -> "SnowflakeQueryCompiler": + """ + Returns a QueryCompiler containing only a column that checks for monotonically + decreasing values in the first data column of this QueryCompiler. + + Returns + ------- + SnowflakeQueryCompiler + QueryCompiler with column to ascertain whether data is monotonically decreasing. + """ + return self._check_monotonic(increasing=False) + + def is_monotonic_increasing(self) -> "SnowflakeQueryCompiler": + """ + Returns a QueryCompiler containing only a column that checks for monotonically + increasing values in the first data column of this QueryCompiler. + + Returns + ------- + SnowflakeQueryCompiler + QueryCompiler with column to ascertain whether data is monotonically increasing. + """ + return self._check_monotonic(increasing=True) + + def _check_monotonic(self, increasing: bool) -> "SnowflakeQueryCompiler": + """ + Returns a QueryCompiler containing only a column that checks for monotonically + decreasing or increasing values (depending on `increasing`) in the first data column of this QueryCompiler. + + Parameters + ---------- + increasing: bool + Whether to check for monotonically increasing or decreasing values. + + Returns + ------- + SnowflakeQueryCompiler + QueryCompiler with column to ascertain whether data is monotonically decreasing/increasing. + """ + col_to_check = self._modin_frame.data_column_snowflake_quoted_identifiers[0] + ( + new_qc, + monotonic_increasing_snowflake_quoted_identifier, + monotonic_decreasing_snowflake_quoted_identifier, + ) = self._add_columns_for_monotonicity_checks( + col_to_check=col_to_check, + columns_to_add="increasing" if increasing else "decreasing", + ) + data_column_snowflake_quoted_identifiers = [] + if increasing: + data_column_snowflake_quoted_identifiers.append( + monotonic_increasing_snowflake_quoted_identifier + ) + else: + data_column_snowflake_quoted_identifiers.append( + monotonic_decreasing_snowflake_quoted_identifier + ) + new_modin_frame = new_qc._modin_frame + return SnowflakeQueryCompiler( + InternalFrame.create( + ordered_dataframe=new_modin_frame.ordered_dataframe.limit( + n=1, sort=False + ), + data_column_pandas_index_names=new_modin_frame.data_column_pandas_index_names, + data_column_pandas_labels=["monotonic_column"], + data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers, + index_column_pandas_labels=new_modin_frame.index_column_pandas_labels, + index_column_snowflake_quoted_identifiers=new_modin_frame.index_column_snowflake_quoted_identifiers, + data_column_types=None, + index_column_types=None, + ) + ) + def _add_columns_for_monotonicity_checks( - self, col_to_check: str - ) -> tuple["SnowflakeQueryCompiler", str, str]: + self, col_to_check: str, columns_to_add: Optional[str] = None + ) -> tuple["SnowflakeQueryCompiler", Optional[str], Optional[str]]: """ Adds columns that check for monotonicity (increasing or decreasing) in the specified column. @@ -2297,6 +2370,8 @@ def _add_columns_for_monotonicity_checks( ---------- col_to_check : str The Snowflake quoted identifier for the column whose monotonicity to check. + columns_to_add : str, optional + Whether or not to add all columns, and if not, which columns to add. Returns ------- @@ -2307,9 +2382,16 @@ def _add_columns_for_monotonicity_checks( """ self._raise_not_implemented_error_for_timedelta() + assert columns_to_add in [ + None, + "increasing", + "decreasing", + ], "Invalid value passed to function" modin_frame = self._modin_frame modin_frame = modin_frame.ensure_row_position_column() row_position_column = modin_frame.row_position_snowflake_quoted_identifier + monotonic_decreasing_snowflake_quoted_id = None + monotonic_increasing_snowflake_quoted_id = None modin_frame = modin_frame.append_column( "_index_lag_col", lag(col_to_check).over(Window.order_by(row_position_column)), @@ -2317,26 +2399,28 @@ def _add_columns_for_monotonicity_checks( lag_col_snowflake_quoted_id = ( modin_frame.data_column_snowflake_quoted_identifiers[-1] ) - modin_frame = modin_frame.append_column( - "_is_monotonic_decreasing", - coalesce( - min_(col(col_to_check) < col(lag_col_snowflake_quoted_id)).over(), - pandas_lit(False), - ), - ) - monotonic_decreasing_snowflake_quoted_id = ( - modin_frame.data_column_snowflake_quoted_identifiers[-1] - ) - modin_frame = modin_frame.append_column( - "_is_monotonic_increasing", - coalesce( - min_(col(col_to_check) > col(lag_col_snowflake_quoted_id)).over(), - pandas_lit(False), - ), - ) - monotonic_increasing_snowflake_quoted_id = ( - modin_frame.data_column_snowflake_quoted_identifiers[-1] - ) + if columns_to_add in [None, "decreasing"]: + modin_frame = modin_frame.append_column( + "_is_monotonic_decreasing", + coalesce( + min_(col(col_to_check) <= col(lag_col_snowflake_quoted_id)).over(), + pandas_lit(False), + ), + ) + monotonic_decreasing_snowflake_quoted_id = ( + modin_frame.data_column_snowflake_quoted_identifiers[-1] + ) + if columns_to_add in [None, "increasing"]: + modin_frame = modin_frame.append_column( + "_is_monotonic_increasing", + coalesce( + min_(col(col_to_check) >= col(lag_col_snowflake_quoted_id)).over(), + pandas_lit(False), + ), + ) + monotonic_increasing_snowflake_quoted_id = ( + modin_frame.data_column_snowflake_quoted_identifiers[-1] + ) data_column_pandas_labels = modin_frame.data_column_pandas_labels data_column_snowflake_quoted_identifiers = ( modin_frame.data_column_snowflake_quoted_identifiers diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py index 4878c82635a..a8ab6a60c77 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py @@ -3661,6 +3661,48 @@ def hasnans(): Return True if there are any NaNs. """ + @property + def is_monotonic_decreasing(): + """ + Return boolean if values in the object are monotonically decreasing. + + Returns + ------- + bool + Whether or not the Series is monotonically decreasing. + + Examples + -------- + >>> s = pd.Series([3, 2, 2, 1]) + >>> s.is_monotonic_decreasing + True + + >>> s = pd.Series([1, 2, 3]) + >>> s.is_monotonic_decreasing + False + """ + + @property + def is_monotonic_increasing(): + """ + Return boolean if values in the object are monotonically increasing. + + Returns + ------- + bool + Whether or not the Series is monotonically increasing. + + Examples + -------- + >>> s = pd.Series([1, 2, 2]) + >>> s.is_monotonic_increasing + True + + >>> s = pd.Series([3, 2, 1]) + >>> s.is_monotonic_increasing + False + """ + def isna(): """ Detect missing values. @@ -3721,18 +3763,6 @@ def isnull(): dtype: bool """ - @property - def is_monotonic_increasing(): - """ - Return True if values in the Series are monotonic_increasing. - """ - - @property - def is_monotonic_decreasing(): - """ - Return True if values in the Series are monotonic_decreasing. - """ - @property def is_unique(): """ diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py index 808489b8917..2682fd2b985 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/index.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/index.py @@ -398,8 +398,7 @@ def values(self) -> ArrayLike: return self.to_pandas().values @property - @index_not_implemented() - def is_monotonic_increasing(self) -> None: + def is_monotonic_increasing(self) -> bool: """ Return a boolean if the values are equal or increasing. @@ -411,12 +410,20 @@ def is_monotonic_increasing(self) -> None: See Also -------- Index.is_monotonic_decreasing : Check if the values are equal or decreasing + + Examples + -------- + >>> pd.Index([1, 2, 3]).is_monotonic_increasing + True + >>> pd.Index([1, 2, 2]).is_monotonic_increasing + True + >>> pd.Index([1, 3, 2]).is_monotonic_increasing + False """ - # TODO: SNOW-1458134 implement is_monotonic_increasing + return self.to_series().is_monotonic_increasing @property - @index_not_implemented() - def is_monotonic_decreasing(self) -> None: + def is_monotonic_decreasing(self) -> bool: """ Return a boolean if the values are equal or decreasing. @@ -428,8 +435,17 @@ def is_monotonic_decreasing(self) -> None: See Also -------- Index.is_monotonic_increasing : Check if the values are equal or increasing + + Examples + -------- + >>> pd.Index([3, 2, 1]).is_monotonic_decreasing + True + >>> pd.Index([3, 2, 2]).is_monotonic_decreasing + True + >>> pd.Index([3, 1, 2]).is_monotonic_decreasing + False """ - # TODO: SNOW-1458134 implement is_monotonic_decreasing + return self.to_series().is_monotonic_decreasing @property def is_unique(self) -> bool: diff --git a/tests/integ/modin/index/test_monotonic.py b/tests/integ/modin/index/test_monotonic.py new file mode 100644 index 00000000000..5a15e4eb021 --- /dev/null +++ b/tests/integ/modin/index/test_monotonic.py @@ -0,0 +1,97 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import numpy as np +import pandas as native_pd +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.modin.sql_counter import sql_count_checker + + +@pytest.mark.parametrize( + "values", [[1, 2, 3], [3, 2, 1], [1, 3, 2], [1, 2, 2], [1, np.NaN, 3]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_numbers(values): + assert ( + pd.Index(values).is_monotonic_increasing + == native_pd.Index(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [[3, 2, 1], [1, 2, 3], [3, 1, 2], [2, 2, 1], [3, np.NaN, 1]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_numbers(values): + assert ( + pd.Index(values).is_monotonic_decreasing + == native_pd.Index(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", [["a", "b", "c"], ["c", "b", "a"], ["a", "c", "b"], ["ca", "cab", "cat"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_str(values): + assert ( + pd.Index(values).is_monotonic_increasing + == native_pd.Index(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [["c", "b", "a"], ["a", "b", "c"], ["c", "a", "b"], ["cat", "cab", "ca"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_str(values): + assert ( + pd.Index(values).is_monotonic_decreasing + == native_pd.Index(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[0, 2, 1]], + [ + native_pd.Timestamp("2018-01-01 00:00:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 01:20:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_dates(values): + assert ( + pd.DatetimeIndex(values).is_monotonic_increasing + == native_pd.DatetimeIndex(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[2, 0, 1]], + [ + native_pd.Timestamp("2018-01-01 01:20:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 00:00:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_dates(values): + assert ( + pd.DatetimeIndex(values).is_monotonic_decreasing + == native_pd.DatetimeIndex(values).is_monotonic_decreasing + ) diff --git a/tests/integ/modin/series/test_monotonic.py b/tests/integ/modin/series/test_monotonic.py new file mode 100644 index 00000000000..8726b9d9bd8 --- /dev/null +++ b/tests/integ/modin/series/test_monotonic.py @@ -0,0 +1,97 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import numpy as np +import pandas as native_pd +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.modin.sql_counter import sql_count_checker + + +@pytest.mark.parametrize( + "values", [[1, 2, 3], [3, 2, 1], [1, 3, 2], [1, 2, 2], [1, np.NaN, 3]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_numbers(values): + assert ( + pd.Series(values).is_monotonic_increasing + == native_pd.Series(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [[3, 2, 1], [1, 2, 3], [3, 1, 2], [2, 2, 1], [3, np.NaN, 1]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_numbers(values): + assert ( + pd.Series(values).is_monotonic_decreasing + == native_pd.Series(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", [["a", "b", "c"], ["c", "b", "a"], ["a", "c", "b"], ["ca", "cab", "cat"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_str(values): + assert ( + pd.Series(values).is_monotonic_increasing + == native_pd.Series(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [["c", "b", "a"], ["a", "b", "c"], ["c", "a", "b"], ["cat", "cab", "ca"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_str(values): + assert ( + pd.Series(values).is_monotonic_decreasing + == native_pd.Series(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[0, 2, 1]], + [ + native_pd.Timestamp("2018-01-01 00:00:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 01:20:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_dates(values): + assert ( + pd.Series(values).is_monotonic_increasing + == native_pd.Series(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[2, 0, 1]], + [ + native_pd.Timestamp("2018-01-01 01:20:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 00:00:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_dates(values): + assert ( + pd.Series(values).is_monotonic_decreasing + == native_pd.Series(values).is_monotonic_decreasing + ) diff --git a/tests/integ/modin/test_unimplemented.py b/tests/integ/modin/test_unimplemented.py index 8b1d6ef182f..deb5bce6af1 100644 --- a/tests/integ/modin/test_unimplemented.py +++ b/tests/integ/modin/test_unimplemented.py @@ -81,8 +81,6 @@ def helper(df): # unsupported methods that can only be applied on series # This set triggers SeriesDefault.register UNSUPPORTED_SERIES_METHODS = [ - (lambda se: se.is_monotonic_increasing, "property fget:is_monotonic_increasing"), - (lambda se: se.is_monotonic_decreasing, "property fget:is_monotonic_decreasing"), (lambda df: df.transform(lambda x: x + 1), "transform"), ] @@ -180,8 +178,6 @@ def test_unsupported_str_methods(func, func_name, caplog) -> None: # unsupported methods for Index UNSUPPORTED_INDEX_METHODS = [ - lambda idx: idx.is_monotonic_increasing(), - lambda idx: idx.is_monotonic_decreasing(), lambda idx: idx.nbytes(), lambda idx: idx.memory_usage(), lambda idx: idx.delete(),