Skip to content

Commit

Permalink
Merge branch 'main' into nkumar-SNOW-1637945-attrs
Browse files Browse the repository at this point in the history
  • Loading branch information
sfc-gh-nkumar authored Aug 30, 2024
2 parents e47d1a5 + 25c1006 commit ab837ef
Show file tree
Hide file tree
Showing 24 changed files with 2,332 additions and 119 deletions.
8 changes: 6 additions & 2 deletions src/snowflake/snowpark/_internal/analyzer/analyzer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,9 @@ def create_table_as_select_statement(
max_data_extension_time: Optional[int] = None,
change_tracking: Optional[bool] = None,
copy_grants: bool = False,
*,
use_scoped_temp_objects: bool = False,
is_generated: bool = False,
) -> str:
column_definition_sql = (
f"{LEFT_PARENTHESIS}{column_definition}{RIGHT_PARENTHESIS}"
Expand All @@ -877,8 +880,9 @@ def create_table_as_select_statement(
}
)
return (
f"{CREATE}{OR + REPLACE if replace else EMPTY_STRING} {table_type.upper()} {TABLE}"
f"{IF + NOT + EXISTS if not replace and not error else EMPTY_STRING} "
f"{CREATE}{OR + REPLACE if replace else EMPTY_STRING}"
f" {(get_temp_type_for_object(use_scoped_temp_objects, is_generated) if table_type.lower() in TEMPORARY_STRING_SET else table_type).upper()} "
f"{TABLE}{IF + NOT + EXISTS if not replace and not error else EMPTY_STRING} "
f"{table_name}{column_definition_sql}{cluster_by_clause}{options_statement}"
f"{COPY_GRANTS if copy_grants else EMPTY_STRING}{comment_sql} {AS}{project_statement([], child)}"
)
Expand Down
2 changes: 2 additions & 0 deletions src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,8 @@ def get_create_table_as_select_plan(child: SnowflakePlan, replace, error):
max_data_extension_time=max_data_extension_time,
change_tracking=change_tracking,
copy_grants=copy_grants,
use_scoped_temp_objects=use_scoped_temp_objects,
is_generated=is_generated,
),
child,
source_plan,
Expand Down
79 changes: 71 additions & 8 deletions src/snowflake/snowpark/modin/pandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,16 @@
timedelta_range,
)

import modin.pandas

# TODO: SNOW-851745 make sure add all Snowpark pandas API general functions
from modin.pandas import plotting # type: ignore[import]

from snowflake.snowpark.modin.pandas.dataframe import DataFrame
from snowflake.snowpark.modin.pandas.api.extensions import (
register_dataframe_accessor,
register_series_accessor,
)
from snowflake.snowpark.modin.pandas.dataframe import _DATAFRAME_EXTENSIONS_, DataFrame
from snowflake.snowpark.modin.pandas.general import (
concat,
crosstab,
Expand Down Expand Up @@ -140,15 +146,15 @@
read_xml,
to_pickle,
)
from snowflake.snowpark.modin.pandas.series import Series
from snowflake.snowpark.modin.pandas.series import _SERIES_EXTENSIONS_, Series
from snowflake.snowpark.modin.plugin._internal.session import SnowpandasSessionHolder
from snowflake.snowpark.modin.plugin._internal.telemetry import (
try_add_telemetry_to_attribute,
)

# The extensions assigned to this module
_PD_EXTENSIONS_: dict = {}

# base needs to be re-exported in order to properly override docstrings for BasePandasDataset
# moving this import higher prevents sphinx from building documentation (??)
from snowflake.snowpark.modin.pandas import base # isort: skip # noqa: E402,F401

import snowflake.snowpark.modin.plugin.extensions.pd_extensions as pd_extensions # isort: skip # noqa: E402,F401
import snowflake.snowpark.modin.plugin.extensions.pd_overrides # isort: skip # noqa: E402,F401
Expand All @@ -157,12 +163,71 @@
DatetimeIndex,
TimedeltaIndex,
)

# this must occur before overrides are applied
_attrs_defined_on_modin_base = set(dir(modin.pandas.base.BasePandasDataset))
_attrs_defined_on_series = set(
dir(Series)
) # TODO: SNOW-1063347 revisit when series.py is removed
_attrs_defined_on_dataframe = set(
dir(DataFrame)
) # TODO: SNOW-1063346 revisit when dataframe.py is removed

# base overrides occur before subclass overrides in case subclasses override a base method
import snowflake.snowpark.modin.plugin.extensions.base_extensions # isort: skip # noqa: E402,F401
import snowflake.snowpark.modin.plugin.extensions.base_overrides # isort: skip # noqa: E402,F401
import snowflake.snowpark.modin.plugin.extensions.dataframe_extensions # isort: skip # noqa: E402,F401
import snowflake.snowpark.modin.plugin.extensions.dataframe_overrides # isort: skip # noqa: E402,F401
import snowflake.snowpark.modin.plugin.extensions.series_extensions # isort: skip # noqa: E402,F401
import snowflake.snowpark.modin.plugin.extensions.series_overrides # isort: skip # noqa: E402,F401

# For any method defined on Series/DF, add telemetry to it if it meets all of the following conditions:
# 1. The method was defined directly on upstream BasePandasDataset (_attrs_defined_on_modin_base)
# 2. The method is not overridden by a child class (this will change)
# 3. The method is not overridden by an extensions module
# 4. The method name does not start with an _
#
# TODO: SNOW-1063347
# Since we still use the vendored version of Series and the overrides for the top-level
# namespace haven't been performed yet, we need to set properties on the vendored version
_base_telemetry_added_attrs = set()

_series_ext = _SERIES_EXTENSIONS_.copy()
for attr_name in dir(Series):
if (
attr_name in _attrs_defined_on_modin_base
and attr_name in _attrs_defined_on_series
and attr_name not in _series_ext
and not attr_name.startswith("_")
):
register_series_accessor(attr_name)(
try_add_telemetry_to_attribute(attr_name, getattr(Series, attr_name))
)
_base_telemetry_added_attrs.add(attr_name)

# TODO: SNOW-1063346
# Since we still use the vendored version of DataFrame and the overrides for the top-level
# namespace haven't been performed yet, we need to set properties on the vendored version
_dataframe_ext = _DATAFRAME_EXTENSIONS_.copy()
for attr_name in dir(DataFrame):
if (
attr_name in _attrs_defined_on_modin_base
and attr_name in _attrs_defined_on_dataframe
and attr_name not in _dataframe_ext
and not attr_name.startswith("_")
):
# If telemetry was already added via Series, register the override but don't re-wrap
# the method in the telemetry annotation. If we don't do this check, we will end up
# double-reporting telemetry on some methods.
original_attr = getattr(DataFrame, attr_name)
new_attr = (
original_attr
if attr_name in _base_telemetry_added_attrs
else try_add_telemetry_to_attribute(attr_name, original_attr)
)
register_dataframe_accessor(attr_name)(new_attr)
_base_telemetry_added_attrs.add(attr_name)


def __getattr__(name: str) -> Any:
"""
Expand Down Expand Up @@ -220,7 +285,6 @@ def __getattr__(name: str) -> Any:
"date_range",
"Index",
"MultiIndex",
"Series",
"bdate_range",
"period_range",
"DatetimeIndex",
Expand Down Expand Up @@ -318,8 +382,7 @@ def __getattr__(name: str) -> Any:
# Manually re-export the members of the pd_extensions namespace, which are not declared in __all__.
_EXTENSION_ATTRS = ["read_snowflake", "to_snowflake", "to_snowpark", "to_pandas"]
# We also need to re-export native_pd.offsets, since modin.pandas doesn't re-export it.
# snowflake.snowpark.pandas.base also needs to be re-exported to make docstring overrides for BasePandasDataset work.
_ADDITIONAL_ATTRS = ["offsets", "base"]
_ADDITIONAL_ATTRS = ["offsets"]

# This code should eventually be moved into the `snowflake.snowpark.modin.plugin` module instead.
# Currently, trying to do so would result in incorrect results because `snowflake.snowpark.modin.pandas`
Expand Down
6 changes: 4 additions & 2 deletions src/snowflake/snowpark/modin/pandas/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import numpy as np
import pandas
from modin.pandas.accessor import CachedAccessor, SparseFrameAccessor
from modin.pandas.base import BasePandasDataset

# from . import _update_engine
from modin.pandas.iterator import PartitionIterator
Expand Down Expand Up @@ -73,7 +74,6 @@
from pandas.util._validators import validate_bool_kwarg

from snowflake.snowpark.modin import pandas as pd
from snowflake.snowpark.modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset
from snowflake.snowpark.modin.pandas.groupby import (
DataFrameGroupBy,
validate_groupby_args,
Expand All @@ -91,12 +91,14 @@
replace_external_data_keys_with_empty_pandas_series,
replace_external_data_keys_with_query_compiler,
)
from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta
from snowflake.snowpark.modin.plugin._internal.utils import is_repr_truncated
from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
from snowflake.snowpark.modin.plugin.utils.error_message import (
ErrorMessage,
dataframe_not_implemented,
)
from snowflake.snowpark.modin.plugin.utils.frontend_constants import _ATTRS_NO_LOOKUP
from snowflake.snowpark.modin.plugin.utils.warning_message import (
SET_DATAFRAME_ATTRIBUTE_WARNING,
WarningMessage,
Expand Down Expand Up @@ -136,7 +138,7 @@
],
apilink="pandas.DataFrame",
)
class DataFrame(BasePandasDataset):
class DataFrame(BasePandasDataset, metaclass=TelemetryMeta):
_pandas_class = pandas.DataFrame

def __init__(
Expand Down
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/modin/pandas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import numpy as np
import pandas
import pandas.core.common as common
from modin.pandas.base import BasePandasDataset
from pandas import IntervalIndex, NaT, Timedelta, Timestamp
from pandas._libs import NaTType, lib
from pandas._libs.tslibs import to_offset
Expand Down Expand Up @@ -61,7 +62,6 @@

# add this line to make doctests runnable
from snowflake.snowpark.modin import pandas as pd # noqa: F401
from snowflake.snowpark.modin.pandas.base import BasePandasDataset
from snowflake.snowpark.modin.pandas.dataframe import DataFrame
from snowflake.snowpark.modin.pandas.series import Series
from snowflake.snowpark.modin.pandas.utils import (
Expand Down
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/modin/pandas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

import numpy as np
import pandas
from modin.pandas.base import BasePandasDataset
from pandas._libs.tslibs import Resolution, parsing
from pandas._typing import AnyArrayLike, Scalar
from pandas.api.types import is_bool, is_list_like
Expand All @@ -58,7 +59,6 @@

import snowflake.snowpark.modin.pandas as pd
import snowflake.snowpark.modin.pandas.utils as frontend_utils
from snowflake.snowpark.modin.pandas.base import BasePandasDataset
from snowflake.snowpark.modin.pandas.dataframe import DataFrame
from snowflake.snowpark.modin.pandas.series import (
SERIES_SETITEM_LIST_LIKE_KEY_AND_RANGE_LIKE_VALUE_ERROR_MESSAGE,
Expand Down
6 changes: 4 additions & 2 deletions src/snowflake/snowpark/modin/pandas/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import numpy.typing as npt
import pandas
from modin.pandas.accessor import CachedAccessor, SparseAccessor
from modin.pandas.base import BasePandasDataset
from modin.pandas.iterator import PartitionIterator
from pandas._libs.lib import NoDefault, is_integer, no_default
from pandas._typing import (
Expand All @@ -51,17 +52,18 @@
from pandas.core.series import _coerce_method
from pandas.util._validators import validate_bool_kwarg

from snowflake.snowpark.modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset
from snowflake.snowpark.modin.pandas.utils import (
from_pandas,
is_scalar,
try_convert_index_to_native,
)
from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta
from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike
from snowflake.snowpark.modin.plugin.utils.error_message import (
ErrorMessage,
series_not_implemented,
)
from snowflake.snowpark.modin.plugin.utils.frontend_constants import _ATTRS_NO_LOOKUP
from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage
from snowflake.snowpark.modin.utils import (
MODIN_UNNAMED_SERIES_LABEL,
Expand Down Expand Up @@ -108,7 +110,7 @@
],
apilink="pandas.Series",
)
class Series(BasePandasDataset):
class Series(BasePandasDataset, metaclass=TelemetryMeta):
_pandas_class = pandas.Series
__array_priority__ = pandas.Series.__array_priority__

Expand Down
3 changes: 1 addition & 2 deletions src/snowflake/snowpark/modin/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,9 @@ def is_scalar(obj):
bool
True if given object is scalar and False otherwise.
"""
from modin.pandas.base import BasePandasDataset
from pandas.api.types import is_scalar as pandas_is_scalar

from .base import BasePandasDataset

return not isinstance(obj, BasePandasDataset) and pandas_is_scalar(obj)


Expand Down
26 changes: 17 additions & 9 deletions src/snowflake/snowpark/modin/plugin/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,23 @@
import modin.utils # type: ignore[import] # isort: skip # noqa: E402
import modin.pandas.series_utils # type: ignore[import] # isort: skip # noqa: E402

modin.utils._inherit_docstrings(
docstrings.series_utils.StringMethods,
overwrite_existing=True,
)(modin.pandas.series_utils.StringMethods)

modin.utils._inherit_docstrings(
docstrings.series_utils.CombinedDatetimelikeProperties,
overwrite_existing=True,
)(modin.pandas.series_utils.DatetimeProperties)
# TODO: SNOW-1643979 pull in fixes for
# https://github.com/modin-project/modin/issues/7113 and https://github.com/modin-project/modin/issues/7134
# Upstream Modin has issues with certain docstring generation edge cases, so we should use our version instead
_inherit_docstrings = snowflake.snowpark.modin.utils._inherit_docstrings

inherit_modules = [
(docstrings.base.BasePandasDataset, modin.pandas.base.BasePandasDataset),
(docstrings.series_utils.StringMethods, modin.pandas.series_utils.StringMethods),
(
docstrings.series_utils.CombinedDatetimelikeProperties,
modin.pandas.series_utils.DatetimeProperties,
),
]

for (doc_module, target_object) in inherit_modules:
_inherit_docstrings(doc_module, overwrite_existing=True)(target_object)


# Don't warn the user about our internal usage of private preview pivot
# features. The user should have already been warned that Snowpark pandas
Expand Down
Loading

0 comments on commit ab837ef

Please sign in to comment.