Skip to content

Commit

Permalink
override getattr
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-joshi committed Sep 4, 2024
1 parent 13188da commit 3958304
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 41 deletions.
16 changes: 6 additions & 10 deletions src/snowflake/snowpark/modin/pandas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1311,16 +1311,12 @@ def __setitem__(
set_as_coords = is_row_key_df and is_col_key_df

new_qc = self.qc.set_2d_positional(
(
row_loc._query_compiler
if isinstance(row_loc, BasePandasDataset)
else row_loc
),
(
col_loc._query_compiler
if isinstance(col_loc, BasePandasDataset)
else col_loc
),
row_loc._query_compiler
if isinstance(row_loc, BasePandasDataset)
else row_loc,
col_loc._query_compiler
if isinstance(col_loc, BasePandasDataset)
else col_loc,
item._query_compiler if isinstance(item, BasePandasDataset) else item,
set_as_coords,
is_item_series,
Expand Down
21 changes: 0 additions & 21 deletions src/snowflake/snowpark/modin/plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,27 +56,6 @@

DocModule.put(docstrings.__name__)

# Configure Modin engine so it detects our Snowflake I/O classes.
# This is necessary to run even basic code like Series/DataFrame constructors,
# as these factories define the `from_pandas` method for each Modin backend.

from modin.config import Engine # isort: skip # noqa: E402

# Secretly insert our factory class into Modin so the dispatcher can find it
from modin.core.execution.dispatching.factories import ( # isort: skip # noqa: E402
factories as modin_factories,
)

from snowflake.snowpark.modin.core.execution.dispatching.factories.factories import ( # isort: skip # noqa: E402
PandasOnSnowflakeFactory,
)

modin_factories.PandasOnSnowflakeFactory = PandasOnSnowflakeFactory

Engine.add_option("Snowflake")
Engine.put("Snowflake")


# We cannot call ModinDocModule.put directly because it will produce a call to `importlib.reload`
# that will overwrite our extensions. We instead directly call the _inherit_docstrings annotation
# See https://github.com/modin-project/modin/issues/7122
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -759,14 +759,6 @@ def copy(self) -> "SnowflakeQueryCompiler":
qc.snowpark_pandas_api_calls = self.snowpark_pandas_api_calls.copy()
return qc

def to_list(self) -> list:
"""
Return a native Python list of the values.

Only called if the frontend object was a Series.
"""
return self.to_pandas().squeeze().to_list()

def series_to_dict(self, into=dict) -> dict: # type: ignore
"""
Convert the Series to a dictionary.
Expand Down
44 changes: 44 additions & 0 deletions src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,50 @@ def decorator(base_method: Any):
return decorator


# Upstream modin has an extra check for `key in self.index`, which produces an extra query
# when an attribute is not present.
# Because __getattr__ itself is responsible for resolving extension methods, we cannot override
# this method via the extensions module, and have to do it with an old-fashioned set.
def __getattr__(self, key):
"""
Return item identified by `key`.
Parameters
----------
key : hashable
Key to get.
Returns
-------
Any
Notes
-----
First try to use `__getattribute__` method. If it fails
try to get `key` from `Series` fields.
"""
# TODO: SNOW-1063347: Modin upgrade - modin.pandas.Series functions
from modin.pandas.base import _ATTRS_NO_LOOKUP
from modin.pandas.series import _SERIES_EXTENSIONS_

try:
return _SERIES_EXTENSIONS_.get(key, object.__getattribute__(self, key))
except AttributeError as err:
if key not in _ATTRS_NO_LOOKUP:
try:
value = self[key]
if isinstance(value, Series) and value.empty:
raise err
return value
except Exception:
# We want to raise err if self[key] raises any kind of exception
raise err
raise err


Series.__getattr__ = __getattr__


# === UNIMPLEMENTED METHODS ===
# The following methods are not implemented in Snowpark pandas, and must be overridden on the
# frontend. These methods fall into a few categories:
Expand Down
3 changes: 1 addition & 2 deletions tests/integ/modin/series/test_getattr.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,7 @@
@pytest.mark.parametrize(
"name, expected_query_count, expected_join_count",
[
# Upstream Modin performs a check against self.index["a"] rather than self["a"], incurring an additional query.
("a", 3, 2),
("a", 2, 2),
("index", 1, 0),
("mean", 0, 0),
],
Expand Down

0 comments on commit 3958304

Please sign in to comment.