From b3770cbd273815f2bc742fa475c7483c8141708c Mon Sep 17 00:00:00 2001 From: Rehan Durrani Date: Wed, 28 Aug 2024 15:58:51 -0700 Subject: [PATCH] [SNOW-1458134]: Add support for monotonic checks for Series and Index --- CHANGELOG.md | 2 + .../modin/supported/index_supported.rst | 4 +- .../modin/supported/series_supported.rst | 4 +- src/snowflake/snowpark/modin/pandas/series.py | 16 -- .../compiler/snowflake_query_compiler.py | 151 +++++++++++++++--- .../modin/plugin/docstrings/series.py | 44 +++-- .../snowpark/modin/plugin/extensions/index.py | 32 +++- .../plugin/extensions/series_overrides.py | 26 +++ tests/integ/modin/index/test_monotonic.py | 97 +++++++++++ tests/integ/modin/series/test_monotonic.py | 97 +++++++++++ 10 files changed, 415 insertions(+), 58 deletions(-) create mode 100644 tests/integ/modin/index/test_monotonic.py create mode 100644 tests/integ/modin/series/test_monotonic.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ad1c8e9cb95..7f006cbfe47 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -72,6 +72,8 @@ - Added support for `Index.is_boolean`, `Index.is_integer`, `Index.is_floating`, `Index.is_numeric`, and `Index.is_object`. - Added support for `DatetimeIndex.round`, `DatetimeIndex.floor` and `DatetimeIndex.ceil`. - Added support for `Series.dt.days_in_month` and `Series.dt.daysinmonth`. +- Added support for `Series.is_monotonic_increasing` and `Series.is_monotonic_decreasing`. +- Added support for `Index.is_monotonic_increasing` and `Index.is_monotonic_decreasing`. #### Improvements diff --git a/docs/source/modin/supported/index_supported.rst b/docs/source/modin/supported/index_supported.rst index 9db80686454..0c413c201fb 100644 --- a/docs/source/modin/supported/index_supported.rst +++ b/docs/source/modin/supported/index_supported.rst @@ -20,9 +20,9 @@ Attributes +-----------------------------+---------------------------------+----------------------------------------------------+ | ``values`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_increasing`` | N | | +| ``is_monotonic_increasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_decreasing`` | N | | +| ``is_monotonic_decreasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``is_unique`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/series_supported.rst b/docs/source/modin/supported/series_supported.rst index 331be4d0298..618b88d5034 100644 --- a/docs/source/modin/supported/series_supported.rst +++ b/docs/source/modin/supported/series_supported.rst @@ -43,9 +43,9 @@ Attributes +-----------------------------+---------------------------------+----------------------------------------------------+ | ``index`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_decreasing`` | N | | +| ``is_monotonic_decreasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_increasing`` | N | | +| ``is_monotonic_increasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``is_unique`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py index 1ce3ecfc997..3f52a939c49 100644 --- a/src/snowflake/snowpark/modin/pandas/series.py +++ b/src/snowflake/snowpark/modin/pandas/series.py @@ -2481,22 +2481,6 @@ def isnull(self): """ return super().isnull() - @property - def is_monotonic_increasing(self): # noqa: RT01, D200 - """ - Return True if values in the Series are monotonic_increasing. - """ - # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions - return self._reduce_dimension(self._query_compiler.is_monotonic_increasing()) - - @property - def is_monotonic_decreasing(self): # noqa: RT01, D200 - """ - Return True if values in the Series are monotonic_decreasing. - """ - # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions - return self._reduce_dimension(self._query_compiler.is_monotonic_decreasing()) - @property def is_unique(self): # noqa: RT01, D200 """ diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index e13c77f8ec3..cad4ff32d27 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -2272,9 +2272,105 @@ def reindex( else: return self._reindex_axis_1(labels=labels, **kwargs) + def is_monotonic_decreasing(self, index: bool) -> "SnowflakeQueryCompiler": + """ + Returns a QueryCompiler containing only a column that checks for monotonically + decreasing values. + + Parameters + ---------- + index: bool + Whether or not this is called by an `Index` object. If False, it is assumed + that this method is called by a `Series` object, and monotonicity is checked + for the first data column. If True, monotonicity is checked for the first index + column. + + Returns + ------- + SnowflakeQueryCompiler + QueryCompiler with column to ascertain whether data is monotonically decreasing. + """ + col_to_check = ( + self._modin_frame.index_column_snowflake_quoted_identifiers[0] + if index + else self._modin_frame.data_column_snowflake_quoted_identifiers[0] + ) + ( + new_qc, + _, + monotonic_decreasing_snowflake_quoted_identifier, + ) = self._add_columns_for_monotonicity_checks( + col_to_check=col_to_check, columns_to_add="decreasing" + ) + new_modin_frame = new_qc._modin_frame + return SnowflakeQueryCompiler( + InternalFrame.create( + ordered_dataframe=new_modin_frame.ordered_dataframe.limit( + n=1, sort=False + ), + data_column_pandas_index_names=new_modin_frame.data_column_pandas_index_names, + data_column_pandas_labels=["monotonic_column"], + data_column_snowflake_quoted_identifiers=[ + monotonic_decreasing_snowflake_quoted_identifier + ], + index_column_pandas_labels=new_modin_frame.index_column_pandas_labels, + index_column_snowflake_quoted_identifiers=new_modin_frame.index_column_snowflake_quoted_identifiers, + data_column_types=None, + index_column_types=None, + ) + ) + + def is_monotonic_increasing(self, index: bool) -> "SnowflakeQueryCompiler": + """ + Returns a QueryCompiler containing only a column that checks for monotonically + increasing values. + + Parameters + ---------- + index: bool + Whether or not this is called by an `Index` object. If False, it is assumed + that this method is called by a `Series` object, and monotonicity is checked + for the first data column. If True, monotonicity is checked for the first index + column. + + Returns + ------- + SnowflakeQueryCompiler + QueryCompiler with column to ascertain whether data is monotonically increasing. + """ + col_to_check = ( + self._modin_frame.index_column_snowflake_quoted_identifiers[0] + if index + else self._modin_frame.data_column_snowflake_quoted_identifiers[0] + ) + ( + new_qc, + monotonic_increasing_snowflake_quoted_identifier, + _, + ) = self._add_columns_for_monotonicity_checks( + col_to_check=col_to_check, columns_to_add="increasing" + ) + new_modin_frame = new_qc._modin_frame + return SnowflakeQueryCompiler( + InternalFrame.create( + ordered_dataframe=new_modin_frame.ordered_dataframe.limit( + n=1, sort=False + ), + data_column_pandas_index_names=new_modin_frame.data_column_pandas_index_names, + data_column_pandas_labels=["monotonic_column"], + data_column_snowflake_quoted_identifiers=[ + monotonic_increasing_snowflake_quoted_identifier + ], + index_column_pandas_labels=new_modin_frame.index_column_pandas_labels, + index_column_snowflake_quoted_identifiers=new_modin_frame.index_column_snowflake_quoted_identifiers, + data_column_types=None, + index_column_types=None, + ) + ) + def _add_columns_for_monotonicity_checks( - self, col_to_check: str - ) -> tuple["SnowflakeQueryCompiler", str, str]: + self, col_to_check: str, columns_to_add: Optional[str] = None + ) -> tuple["SnowflakeQueryCompiler", Optional[str], Optional[str]]: """ Adds columns that check for monotonicity (increasing or decreasing) in the specified column. @@ -2283,6 +2379,8 @@ def _add_columns_for_monotonicity_checks( ---------- col_to_check : str The Snowflake quoted identifier for the column whose monotonicity to check. + columns_to_add : str, optional + Whether or not to add all columns, and if not, which columns to add. Returns ------- @@ -2293,9 +2391,16 @@ def _add_columns_for_monotonicity_checks( """ self._raise_not_implemented_error_for_timedelta() + assert columns_to_add in [ + None, + "increasing", + "decreasing", + ], "Invalid value passed to function" modin_frame = self._modin_frame modin_frame = modin_frame.ensure_row_position_column() row_position_column = modin_frame.row_position_snowflake_quoted_identifier + monotonic_decreasing_snowflake_quoted_id = None + monotonic_increasing_snowflake_quoted_id = None modin_frame = modin_frame.append_column( "_index_lag_col", lag(col_to_check).over(Window.order_by(row_position_column)), @@ -2303,26 +2408,28 @@ def _add_columns_for_monotonicity_checks( lag_col_snowflake_quoted_id = ( modin_frame.data_column_snowflake_quoted_identifiers[-1] ) - modin_frame = modin_frame.append_column( - "_is_monotonic_decreasing", - coalesce( - min_(col(col_to_check) < col(lag_col_snowflake_quoted_id)).over(), - pandas_lit(False), - ), - ) - monotonic_decreasing_snowflake_quoted_id = ( - modin_frame.data_column_snowflake_quoted_identifiers[-1] - ) - modin_frame = modin_frame.append_column( - "_is_monotonic_increasing", - coalesce( - min_(col(col_to_check) > col(lag_col_snowflake_quoted_id)).over(), - pandas_lit(False), - ), - ) - monotonic_increasing_snowflake_quoted_id = ( - modin_frame.data_column_snowflake_quoted_identifiers[-1] - ) + if columns_to_add in [None, "decreasing"]: + modin_frame = modin_frame.append_column( + "_is_monotonic_decreasing", + coalesce( + min_(col(col_to_check) <= col(lag_col_snowflake_quoted_id)).over(), + pandas_lit(False), + ), + ) + monotonic_decreasing_snowflake_quoted_id = ( + modin_frame.data_column_snowflake_quoted_identifiers[-1] + ) + if columns_to_add in [None, "increasing"]: + modin_frame = modin_frame.append_column( + "_is_monotonic_increasing", + coalesce( + min_(col(col_to_check) >= col(lag_col_snowflake_quoted_id)).over(), + pandas_lit(False), + ), + ) + monotonic_increasing_snowflake_quoted_id = ( + modin_frame.data_column_snowflake_quoted_identifiers[-1] + ) data_column_pandas_labels = modin_frame.data_column_pandas_labels data_column_snowflake_quoted_identifiers = ( modin_frame.data_column_snowflake_quoted_identifiers diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py index 6e48a7e57f3..dbf22faa912 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py @@ -3659,6 +3659,38 @@ def hasnans(): Return True if there are any NaNs. """ + @property + def is_monotonic_increasing(): + """ + Return boolean if values in the object are monotonically decreasing. + + Examples + -------- + >>> s = pd.Series([3, 2, 2, 1]) + >>> s.is_monotonic_decreasing + True + + >>> s = pd.Series([1, 2, 3]) + >>> s.is_monotonic_decreasing + False + """ + + @property + def is_monotonic_decreasing(): + """ + Return boolean if values in the object are monotonically increasing. + + Examples + -------- + >>> s = pd.Series([1, 2, 2]) + >>> s.is_monotonic_increasing + True + + >>> s = pd.Series([3, 2, 1]) + >>> s.is_monotonic_increasing + False + """ + def isna(): """ Detect missing values. @@ -3719,18 +3751,6 @@ def isnull(): dtype: bool """ - @property - def is_monotonic_increasing(): - """ - Return True if values in the Series are monotonic_increasing. - """ - - @property - def is_monotonic_decreasing(): - """ - Return True if values in the Series are monotonic_decreasing. - """ - @property def is_unique(): """ diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py index bbd415536af..b7621786e58 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/index.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/index.py @@ -398,7 +398,6 @@ def values(self) -> ArrayLike: return self.to_pandas().values @property - @index_not_implemented() def is_monotonic_increasing(self) -> None: """ Return a boolean if the values are equal or increasing. @@ -411,11 +410,23 @@ def is_monotonic_increasing(self) -> None: See Also -------- Index.is_monotonic_decreasing : Check if the values are equal or decreasing + + Examples + -------- + >>> pd.Index([1, 2, 3]).is_monotonic_increasing + True + >>> pd.Index([1, 2, 2]).is_monotonic_increasing + True + >>> pd.Index([1, 3, 2]).is_monotonic_increasing + False """ - # TODO: SNOW-1458134 implement is_monotonic_increasing + return ( + self._query_compiler.is_monotonic_increasing(index=True) + .to_pandas() + .squeeze() + ) @property - @index_not_implemented() def is_monotonic_decreasing(self) -> None: """ Return a boolean if the values are equal or decreasing. @@ -428,8 +439,21 @@ def is_monotonic_decreasing(self) -> None: See Also -------- Index.is_monotonic_increasing : Check if the values are equal or increasing + + Examples + -------- + >>> pd.Index([3, 2, 1]).is_monotonic_decreasing + True + >>> pd.Index([3, 2, 2]).is_monotonic_decreasing + True + >>> pd.Index([3, 1, 2]).is_monotonic_decreasing + False """ - # TODO: SNOW-1458134 implement is_monotonic_decreasing + return ( + self._query_compiler.is_monotonic_decreasing(index=True) + .to_pandas() + .squeeze() + ) @property def is_unique(self) -> bool: diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py index 0afea30e29a..0e381cc1a19 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py @@ -164,3 +164,29 @@ def plot( @series_not_implemented() def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 pass # pragma: no cover + + +@register_series_accessor("is_monotonic_increasing") +@property +@snowpark_pandas_telemetry_method_decorator +def is_monotonic_increasing(self): # noqa: RT01, D200 + """ + Return True if values in the Series are monotonic_increasing. + """ + # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions + return self._reduce_dimension( + self._query_compiler.is_monotonic_increasing(index=False) + ) + + +@register_series_accessor("is_monotonic_decreasing") +@property +@snowpark_pandas_telemetry_method_decorator +def is_monotonic_decreasing(self): # noqa: RT01, D200 + """ + Return True if values in the Series are monotonic_decreasing. + """ + # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions + return self._reduce_dimension( + self._query_compiler.is_monotonic_decreasing(index=False) + ) diff --git a/tests/integ/modin/index/test_monotonic.py b/tests/integ/modin/index/test_monotonic.py new file mode 100644 index 00000000000..5a15e4eb021 --- /dev/null +++ b/tests/integ/modin/index/test_monotonic.py @@ -0,0 +1,97 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import numpy as np +import pandas as native_pd +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.modin.sql_counter import sql_count_checker + + +@pytest.mark.parametrize( + "values", [[1, 2, 3], [3, 2, 1], [1, 3, 2], [1, 2, 2], [1, np.NaN, 3]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_numbers(values): + assert ( + pd.Index(values).is_monotonic_increasing + == native_pd.Index(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [[3, 2, 1], [1, 2, 3], [3, 1, 2], [2, 2, 1], [3, np.NaN, 1]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_numbers(values): + assert ( + pd.Index(values).is_monotonic_decreasing + == native_pd.Index(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", [["a", "b", "c"], ["c", "b", "a"], ["a", "c", "b"], ["ca", "cab", "cat"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_str(values): + assert ( + pd.Index(values).is_monotonic_increasing + == native_pd.Index(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [["c", "b", "a"], ["a", "b", "c"], ["c", "a", "b"], ["cat", "cab", "ca"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_str(values): + assert ( + pd.Index(values).is_monotonic_decreasing + == native_pd.Index(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[0, 2, 1]], + [ + native_pd.Timestamp("2018-01-01 00:00:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 01:20:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_dates(values): + assert ( + pd.DatetimeIndex(values).is_monotonic_increasing + == native_pd.DatetimeIndex(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[2, 0, 1]], + [ + native_pd.Timestamp("2018-01-01 01:20:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 00:00:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_dates(values): + assert ( + pd.DatetimeIndex(values).is_monotonic_decreasing + == native_pd.DatetimeIndex(values).is_monotonic_decreasing + ) diff --git a/tests/integ/modin/series/test_monotonic.py b/tests/integ/modin/series/test_monotonic.py new file mode 100644 index 00000000000..8726b9d9bd8 --- /dev/null +++ b/tests/integ/modin/series/test_monotonic.py @@ -0,0 +1,97 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import numpy as np +import pandas as native_pd +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.modin.sql_counter import sql_count_checker + + +@pytest.mark.parametrize( + "values", [[1, 2, 3], [3, 2, 1], [1, 3, 2], [1, 2, 2], [1, np.NaN, 3]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_numbers(values): + assert ( + pd.Series(values).is_monotonic_increasing + == native_pd.Series(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [[3, 2, 1], [1, 2, 3], [3, 1, 2], [2, 2, 1], [3, np.NaN, 1]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_numbers(values): + assert ( + pd.Series(values).is_monotonic_decreasing + == native_pd.Series(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", [["a", "b", "c"], ["c", "b", "a"], ["a", "c", "b"], ["ca", "cab", "cat"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_str(values): + assert ( + pd.Series(values).is_monotonic_increasing + == native_pd.Series(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [["c", "b", "a"], ["a", "b", "c"], ["c", "a", "b"], ["cat", "cab", "ca"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_str(values): + assert ( + pd.Series(values).is_monotonic_decreasing + == native_pd.Series(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[0, 2, 1]], + [ + native_pd.Timestamp("2018-01-01 00:00:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 01:20:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_dates(values): + assert ( + pd.Series(values).is_monotonic_increasing + == native_pd.Series(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[2, 0, 1]], + [ + native_pd.Timestamp("2018-01-01 01:20:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 00:00:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_dates(values): + assert ( + pd.Series(values).is_monotonic_decreasing + == native_pd.Series(values).is_monotonic_decreasing + )