diff --git a/src/snowflake/snowpark/modin/pandas/indexing.py b/src/snowflake/snowpark/modin/pandas/indexing.py index c3c4d7b7055..19c8a1b6074 100644 --- a/src/snowflake/snowpark/modin/pandas/indexing.py +++ b/src/snowflake/snowpark/modin/pandas/indexing.py @@ -1311,16 +1311,12 @@ def __setitem__( set_as_coords = is_row_key_df and is_col_key_df new_qc = self.qc.set_2d_positional( - ( - row_loc._query_compiler - if isinstance(row_loc, BasePandasDataset) - else row_loc - ), - ( - col_loc._query_compiler - if isinstance(col_loc, BasePandasDataset) - else col_loc - ), + row_loc._query_compiler + if isinstance(row_loc, BasePandasDataset) + else row_loc, + col_loc._query_compiler + if isinstance(col_loc, BasePandasDataset) + else col_loc, item._query_compiler if isinstance(item, BasePandasDataset) else item, set_as_coords, is_item_series, diff --git a/src/snowflake/snowpark/modin/plugin/__init__.py b/src/snowflake/snowpark/modin/plugin/__init__.py index 63c7a874a1e..d3ac525572a 100644 --- a/src/snowflake/snowpark/modin/plugin/__init__.py +++ b/src/snowflake/snowpark/modin/plugin/__init__.py @@ -56,27 +56,6 @@ DocModule.put(docstrings.__name__) -# Configure Modin engine so it detects our Snowflake I/O classes. -# This is necessary to run even basic code like Series/DataFrame constructors, -# as these factories define the `from_pandas` method for each Modin backend. - -from modin.config import Engine # isort: skip # noqa: E402 - -# Secretly insert our factory class into Modin so the dispatcher can find it -from modin.core.execution.dispatching.factories import ( # isort: skip # noqa: E402 - factories as modin_factories, -) - -from snowflake.snowpark.modin.core.execution.dispatching.factories.factories import ( # isort: skip # noqa: E402 - PandasOnSnowflakeFactory, -) - -modin_factories.PandasOnSnowflakeFactory = PandasOnSnowflakeFactory - -Engine.add_option("Snowflake") -Engine.put("Snowflake") - - # We cannot call ModinDocModule.put directly because it will produce a call to `importlib.reload` # that will overwrite our extensions. We instead directly call the _inherit_docstrings annotation # See https://github.com/modin-project/modin/issues/7122 diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index 82ac0db1042..8f0c3715e9b 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -759,14 +759,6 @@ def copy(self) -> "SnowflakeQueryCompiler": qc.snowpark_pandas_api_calls = self.snowpark_pandas_api_calls.copy() return qc - def to_list(self) -> list: - """ - Return a native Python list of the values. - - Only called if the frontend object was a Series. - """ - return self.to_pandas().squeeze().to_list() - def series_to_dict(self, into=dict) -> dict: # type: ignore """ Convert the Series to a dictionary. diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py index d3bacd4e963..4a8e9e0d623 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py @@ -88,6 +88,50 @@ def decorator(base_method: Any): return decorator +# Upstream modin has an extra check for `key in self.index`, which produces an extra query +# when an attribute is not present. +# Because __getattr__ itself is responsible for resolving extension methods, we cannot override +# this method via the extensions module, and have to do it with an old-fashioned set. +def __getattr__(self, key): + """ + Return item identified by `key`. + + Parameters + ---------- + key : hashable + Key to get. + + Returns + ------- + Any + + Notes + ----- + First try to use `__getattribute__` method. If it fails + try to get `key` from `Series` fields. + """ + # TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions + from modin.pandas.base import _ATTRS_NO_LOOKUP + from modin.pandas.series import _SERIES_EXTENSIONS_ + + try: + return _SERIES_EXTENSIONS_.get(key, object.__getattribute__(self, key)) + except AttributeError as err: + if key not in _ATTRS_NO_LOOKUP: + try: + value = self[key] + if isinstance(value, Series) and value.empty: + raise err + return value + except Exception: + # We want to raise err if self[key] raises any kind of exception + raise err + raise err + + +Series.__getattr__ = __getattr__ + + # === UNIMPLEMENTED METHODS === # The following methods are not implemented in Snowpark pandas, and must be overridden on the # frontend. These methods fall into a few categories: diff --git a/tests/integ/modin/series/test_getattr.py b/tests/integ/modin/series/test_getattr.py index 908d2f044d7..56208fef974 100644 --- a/tests/integ/modin/series/test_getattr.py +++ b/tests/integ/modin/series/test_getattr.py @@ -16,8 +16,7 @@ @pytest.mark.parametrize( "name, expected_query_count, expected_join_count", [ - # Upstream Modin performs a check against self.index["a"] rather than self["a"], incurring an additional query. - ("a", 3, 2), + ("a", 2, 2), ("index", 1, 0), ("mean", 0, 0), ],