diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f701cb3e02..d0817371d93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ - Fixed a bug in query generation from set operations that allowed generation of duplicate queries when children have common subqueries. - Fixed a bug in `session.get_session_stage` that referenced a non-existing stage after switching database or schema. - Fixed a bug where calling `DataFrame.to_snowpark_pandas_dataframe` without explicitly initializing the Snowpark pandas plugin caused an error. +- Fixed a bug where using the `explode` function in dynamic table creation caused a SQL compilation error due to improper boolean type casting on the `outer` parameter. ### Snowpark Local Testing Updates @@ -61,6 +62,7 @@ - support for binary arithmetic between two `Timedelta` values. - support for lazy `TimedeltaIndex`. - support for `pd.to_timedelta`. + - support for `GroupBy` aggregations `min`, `max`, `mean`, `idxmax`, `idxmin`, `std`, `sum`, `median`, `count`, `any`, `all`, `size`, `nunique`. - Added support for index's arithmetic and comparison operators. - Added support for `Series.dt.round`. - Added documentation pages for `DatetimeIndex`. @@ -78,12 +80,21 @@ #### Improvements - Refactored `quoted_identifier_to_snowflake_type` to avoid making metadata queries if the types have been cached locally. +- Improved `pd.to_datetime` to handle all local input cases. #### Bug Fixes - Stopped ignoring nanoseconds in `pd.Timedelta` scalars. - Fixed AssertionError in tree of binary operations. +#### Behavior Change + +- When calling `DataFrame.set_index`, or setting `DataFrame.index` or `Series.index`, with a new index that does not match the current length of the `Series`/`DataFrame` object, a `ValueError` is no longer raised. When the `Series`/`DataFrame` object is longer than the new index, the `Series`/`DataFrame`'s new index is filled with `NaN` values for the "extra" elements. When the `Series`/`DataFrame` object is shorter than the new index, the extra values in the new index are ignored—`Series` and `DataFrame` stay the same length `n`, and use only the first `n` values of the new index. + +#### Improvements + +- Improve concat, join performance when operations are performed on series coming from the same dataframe by avoiding unnecessary joins. + ## 1.21.0 (2024-08-19) ### Snowpark Python API Updates diff --git a/src/snowflake/snowpark/_internal/analyzer/analyzer.py b/src/snowflake/snowpark/_internal/analyzer/analyzer.py index 0c6c64554de..6567754d229 100644 --- a/src/snowflake/snowpark/_internal/analyzer/analyzer.py +++ b/src/snowflake/snowpark/_internal/analyzer/analyzer.py @@ -151,7 +151,7 @@ from snowflake.snowpark._internal.error_message import SnowparkClientExceptionMessages from snowflake.snowpark._internal.telemetry import TelemetryField from snowflake.snowpark._internal.utils import quote_name -from snowflake.snowpark.types import _NumericType +from snowflake.snowpark.types import BooleanType, _NumericType ARRAY_BIND_THRESHOLD = 512 @@ -605,7 +605,7 @@ def table_function_expression_extractor( sql = named_arguments_function( expr.func_name, { - key: self.analyze( + key: self.to_sql_try_avoid_cast( value, df_aliased_col_name_to_real_col_name, parse_local_name ) for key, value in expr.args.items() @@ -745,6 +745,12 @@ def to_sql_try_avoid_cast( # otherwise process as normal if isinstance(expr, Literal) and isinstance(expr.datatype, _NumericType): return numeric_to_sql_without_cast(expr.value, expr.datatype) + elif ( + isinstance(expr, Literal) + and isinstance(expr.datatype, BooleanType) + and isinstance(expr.value, bool) + ): + return str(expr.value).upper() else: return self.analyze( expr, df_aliased_col_name_to_real_col_name, parse_local_name diff --git a/src/snowflake/snowpark/_internal/analyzer/analyzer_utils.py b/src/snowflake/snowpark/_internal/analyzer/analyzer_utils.py index 7fd351b16dc..e22355674ee 100644 --- a/src/snowflake/snowpark/_internal/analyzer/analyzer_utils.py +++ b/src/snowflake/snowpark/_internal/analyzer/analyzer_utils.py @@ -856,6 +856,9 @@ def create_table_as_select_statement( max_data_extension_time: Optional[int] = None, change_tracking: Optional[bool] = None, copy_grants: bool = False, + *, + use_scoped_temp_objects: bool = False, + is_generated: bool = False, ) -> str: column_definition_sql = ( f"{LEFT_PARENTHESIS}{column_definition}{RIGHT_PARENTHESIS}" @@ -877,8 +880,9 @@ def create_table_as_select_statement( } ) return ( - f"{CREATE}{OR + REPLACE if replace else EMPTY_STRING} {table_type.upper()} {TABLE}" - f"{IF + NOT + EXISTS if not replace and not error else EMPTY_STRING} " + f"{CREATE}{OR + REPLACE if replace else EMPTY_STRING}" + f" {(get_temp_type_for_object(use_scoped_temp_objects, is_generated) if table_type.lower() in TEMPORARY_STRING_SET else table_type).upper()} " + f"{TABLE}{IF + NOT + EXISTS if not replace and not error else EMPTY_STRING} " f"{table_name}{column_definition_sql}{cluster_by_clause}{options_statement}" f"{COPY_GRANTS if copy_grants else EMPTY_STRING}{comment_sql} {AS}{project_statement([], child)}" ) diff --git a/src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py b/src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py index aad369a8b83..559cbeb3cc5 100644 --- a/src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py +++ b/src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py @@ -929,6 +929,8 @@ def get_create_table_as_select_plan(child: SnowflakePlan, replace, error): max_data_extension_time=max_data_extension_time, change_tracking=change_tracking, copy_grants=copy_grants, + use_scoped_temp_objects=use_scoped_temp_objects, + is_generated=is_generated, ), child, source_plan, diff --git a/src/snowflake/snowpark/_internal/compiler/large_query_breakdown.py b/src/snowflake/snowpark/_internal/compiler/large_query_breakdown.py index 34d27862ced..5707d71dc33 100644 --- a/src/snowflake/snowpark/_internal/compiler/large_query_breakdown.py +++ b/src/snowflake/snowpark/_internal/compiler/large_query_breakdown.py @@ -6,8 +6,6 @@ from collections import defaultdict from typing import List, Optional, Tuple -from sortedcontainers import SortedList - from snowflake.snowpark._internal.analyzer.analyzer_utils import ( drop_table_if_exists_statement, ) @@ -201,11 +199,11 @@ def _find_node_to_breakdown(self, root: TreeNode) -> Optional[TreeNode]: 1. Traverse the plan tree and find the valid nodes for partitioning. 2. If no valid node is found, return None. - 3. Keep valid nodes in a sorted list based on the complexity score. - 4. Return the node with the highest complexity score. + 3. Return the node with the highest complexity score. """ current_level = [root] - pipeline_breaker_list = SortedList(key=lambda x: x[0]) + candidate_node = None + candidate_score = -1 # start with -1 since score is always > 0 while current_level: next_level = [] @@ -215,23 +213,20 @@ def _find_node_to_breakdown(self, root: TreeNode) -> Optional[TreeNode]: self._parent_map[child].add(node) valid_to_breakdown, score = self._is_node_valid_to_breakdown(child) if valid_to_breakdown: - # Append score and child to the pipeline breaker sorted list - # so that the valid child with the highest complexity score - # is at the end of the list. - pipeline_breaker_list.add((score, child)) + # If the score for valid node is higher than the last candidate, + # update the candidate node and score. + if score > candidate_score: + candidate_score = score + candidate_node = child else: # don't traverse subtrees if parent is a valid candidate next_level.append(child) current_level = next_level - if not pipeline_breaker_list: - # Return None if no valid node is found for partitioning. - return None - - # Get the node with the highest complexity score - _, child = pipeline_breaker_list.pop() - return child + # If no valid node is found, candidate_node will be None. + # Otherwise, return the node with the highest complexity score. + return candidate_node def _get_partitioned_plan(self, root: TreeNode, child: TreeNode) -> SnowflakePlan: """This method takes cuts the child out from the root, creates a temp table plan for the diff --git a/src/snowflake/snowpark/_internal/compiler/utils.py b/src/snowflake/snowpark/_internal/compiler/utils.py index 30f1cbb2cfd..82c2b090487 100644 --- a/src/snowflake/snowpark/_internal/compiler/utils.py +++ b/src/snowflake/snowpark/_internal/compiler/utils.py @@ -268,15 +268,6 @@ def update_resolvable_node( elif isinstance(node, (SelectSnowflakePlan, SelectTableFunction)): assert node.snowflake_plan is not None update_resolvable_node(node.snowflake_plan, query_generator) - node.pre_actions = node.snowflake_plan.queries[:-1] - node.post_actions = node.snowflake_plan.post_actions - node._api_calls = node.snowflake_plan.api_calls - if isinstance(node, SelectSnowflakePlan): - node._query_params = [] - for query in node._snowflake_plan.queries: - if query.params: - node._query_params.extend(query.params) - node.analyzer = query_generator node.pre_actions = node._snowflake_plan.queries[:-1] diff --git a/src/snowflake/snowpark/modin/pandas/__init__.py b/src/snowflake/snowpark/modin/pandas/__init__.py index c4eb07d9589..b51a47b64b3 100644 --- a/src/snowflake/snowpark/modin/pandas/__init__.py +++ b/src/snowflake/snowpark/modin/pandas/__init__.py @@ -85,10 +85,16 @@ timedelta_range, ) +import modin.pandas + # TODO: SNOW-851745 make sure add all Snowpark pandas API general functions from modin.pandas import plotting # type: ignore[import] -from snowflake.snowpark.modin.pandas.dataframe import DataFrame +from snowflake.snowpark.modin.pandas.api.extensions import ( + register_dataframe_accessor, + register_series_accessor, +) +from snowflake.snowpark.modin.pandas.dataframe import _DATAFRAME_EXTENSIONS_, DataFrame from snowflake.snowpark.modin.pandas.general import ( concat, crosstab, @@ -140,15 +146,15 @@ read_xml, to_pickle, ) -from snowflake.snowpark.modin.pandas.series import Series +from snowflake.snowpark.modin.pandas.series import _SERIES_EXTENSIONS_, Series from snowflake.snowpark.modin.plugin._internal.session import SnowpandasSessionHolder +from snowflake.snowpark.modin.plugin._internal.telemetry import ( + try_add_telemetry_to_attribute, +) # The extensions assigned to this module _PD_EXTENSIONS_: dict = {} -# base needs to be re-exported in order to properly override docstrings for BasePandasDataset -# moving this import higher prevents sphinx from building documentation (??) -from snowflake.snowpark.modin.pandas import base # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.pd_extensions as pd_extensions # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.pd_overrides # isort: skip # noqa: E402,F401 @@ -157,12 +163,71 @@ DatetimeIndex, TimedeltaIndex, ) + +# this must occur before overrides are applied +_attrs_defined_on_modin_base = set(dir(modin.pandas.base.BasePandasDataset)) +_attrs_defined_on_series = set( + dir(Series) +) # TODO: SNOW-1063347 revisit when series.py is removed +_attrs_defined_on_dataframe = set( + dir(DataFrame) +) # TODO: SNOW-1063346 revisit when dataframe.py is removed + +# base overrides occur before subclass overrides in case subclasses override a base method +import snowflake.snowpark.modin.plugin.extensions.base_extensions # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.base_overrides # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.dataframe_extensions # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.dataframe_overrides # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.series_extensions # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.series_overrides # isort: skip # noqa: E402,F401 +# For any method defined on Series/DF, add telemetry to it if it meets all of the following conditions: +# 1. The method was defined directly on upstream BasePandasDataset (_attrs_defined_on_modin_base) +# 2. The method is not overridden by a child class (this will change) +# 3. The method is not overridden by an extensions module +# 4. The method name does not start with an _ +# +# TODO: SNOW-1063347 +# Since we still use the vendored version of Series and the overrides for the top-level +# namespace haven't been performed yet, we need to set properties on the vendored version +_base_telemetry_added_attrs = set() + +_series_ext = _SERIES_EXTENSIONS_.copy() +for attr_name in dir(Series): + if ( + attr_name in _attrs_defined_on_modin_base + and attr_name in _attrs_defined_on_series + and attr_name not in _series_ext + and not attr_name.startswith("_") + ): + register_series_accessor(attr_name)( + try_add_telemetry_to_attribute(attr_name, getattr(Series, attr_name)) + ) + _base_telemetry_added_attrs.add(attr_name) + +# TODO: SNOW-1063346 +# Since we still use the vendored version of DataFrame and the overrides for the top-level +# namespace haven't been performed yet, we need to set properties on the vendored version +_dataframe_ext = _DATAFRAME_EXTENSIONS_.copy() +for attr_name in dir(DataFrame): + if ( + attr_name in _attrs_defined_on_modin_base + and attr_name in _attrs_defined_on_dataframe + and attr_name not in _dataframe_ext + and not attr_name.startswith("_") + ): + # If telemetry was already added via Series, register the override but don't re-wrap + # the method in the telemetry annotation. If we don't do this check, we will end up + # double-reporting telemetry on some methods. + original_attr = getattr(DataFrame, attr_name) + new_attr = ( + original_attr + if attr_name in _base_telemetry_added_attrs + else try_add_telemetry_to_attribute(attr_name, original_attr) + ) + register_dataframe_accessor(attr_name)(new_attr) + _base_telemetry_added_attrs.add(attr_name) + def __getattr__(name: str) -> Any: """ @@ -220,7 +285,6 @@ def __getattr__(name: str) -> Any: "date_range", "Index", "MultiIndex", - "Series", "bdate_range", "period_range", "DatetimeIndex", @@ -318,8 +382,7 @@ def __getattr__(name: str) -> Any: # Manually re-export the members of the pd_extensions namespace, which are not declared in __all__. _EXTENSION_ATTRS = ["read_snowflake", "to_snowflake", "to_snowpark", "to_pandas"] # We also need to re-export native_pd.offsets, since modin.pandas doesn't re-export it. -# snowflake.snowpark.pandas.base also needs to be re-exported to make docstring overrides for BasePandasDataset work. -_ADDITIONAL_ATTRS = ["offsets", "base"] +_ADDITIONAL_ATTRS = ["offsets"] # This code should eventually be moved into the `snowflake.snowpark.modin.plugin` module instead. # Currently, trying to do so would result in incorrect results because `snowflake.snowpark.modin.pandas` diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py index c08cdee1386..26071049237 100644 --- a/src/snowflake/snowpark/modin/pandas/base.py +++ b/src/snowflake/snowpark/modin/pandas/base.py @@ -604,14 +604,6 @@ def _to_series_list(self, index: pd.Index) -> list[pd.Series]: return [pd.Series(index)] def _set_index(self, new_index: Axes) -> None: - """ - Set the index for this DataFrame. - - Parameters - ---------- - new_index : pandas.Index - The new index to set this. - """ # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset self._update_inplace( new_query_compiler=self._query_compiler.set_index( @@ -655,14 +647,6 @@ def set_axis( return obj def _get_index(self): - """ - Get the index for this DataFrame. - - Returns - ------- - pandas.Index - The union of all indexes across the partitions. - """ # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset from snowflake.snowpark.modin.plugin.extensions.index import Index diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py index a7d53813779..b42ad5a04c7 100644 --- a/src/snowflake/snowpark/modin/pandas/dataframe.py +++ b/src/snowflake/snowpark/modin/pandas/dataframe.py @@ -37,6 +37,7 @@ import numpy as np import pandas from modin.pandas.accessor import CachedAccessor, SparseFrameAccessor +from modin.pandas.base import BasePandasDataset # from . import _update_engine from modin.pandas.iterator import PartitionIterator @@ -73,7 +74,6 @@ from pandas.util._validators import validate_bool_kwarg from snowflake.snowpark.modin import pandas as pd -from snowflake.snowpark.modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset from snowflake.snowpark.modin.pandas.groupby import ( DataFrameGroupBy, validate_groupby_args, @@ -91,12 +91,14 @@ replace_external_data_keys_with_empty_pandas_series, replace_external_data_keys_with_query_compiler, ) +from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta from snowflake.snowpark.modin.plugin._internal.utils import is_repr_truncated from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike from snowflake.snowpark.modin.plugin.utils.error_message import ( ErrorMessage, dataframe_not_implemented, ) +from snowflake.snowpark.modin.plugin.utils.frontend_constants import _ATTRS_NO_LOOKUP from snowflake.snowpark.modin.plugin.utils.warning_message import ( SET_DATAFRAME_ATTRIBUTE_WARNING, WarningMessage, @@ -136,7 +138,7 @@ ], apilink="pandas.DataFrame", ) -class DataFrame(BasePandasDataset): +class DataFrame(BasePandasDataset, metaclass=TelemetryMeta): _pandas_class = pandas.DataFrame def __init__( diff --git a/src/snowflake/snowpark/modin/pandas/general.py b/src/snowflake/snowpark/modin/pandas/general.py index 8d933cd6a11..07f0617d612 100644 --- a/src/snowflake/snowpark/modin/pandas/general.py +++ b/src/snowflake/snowpark/modin/pandas/general.py @@ -30,6 +30,7 @@ import numpy as np import pandas import pandas.core.common as common +from modin.pandas.base import BasePandasDataset from pandas import IntervalIndex, NaT, Timedelta, Timestamp from pandas._libs import NaTType, lib from pandas._libs.tslibs import to_offset @@ -61,7 +62,6 @@ # add this line to make doctests runnable from snowflake.snowpark.modin import pandas as pd # noqa: F401 -from snowflake.snowpark.modin.pandas.base import BasePandasDataset from snowflake.snowpark.modin.pandas.dataframe import DataFrame from snowflake.snowpark.modin.pandas.series import Series from snowflake.snowpark.modin.pandas.utils import ( @@ -1742,16 +1742,13 @@ def to_datetime( The default behaviour (``utc=False``) is as follows: - - Timezone-naive inputs are converted to timezone-naive :class:`~snowflake.snowpark.modin.pandas.Series`: + - Timezone-naive inputs are kept as timezone-naive :class:`~snowflake.snowpark.modin.pandas.DatetimeIndex`: - >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00:15']) + >>> pd.to_datetime(['2018-10-26 12:00:00', '2018-10-26 13:00:15']) DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], dtype='datetime64[ns]', freq=None) - - Timezone-aware inputs *with constant time offset* are still converted to - timezone-naive :class:`~snowflake.snowpark.modin.pandas.Series` by default. - >>> pd.to_datetime(['2018-10-26 12:00:00 -0500', '2018-10-26 13:00:00 -0500']) - DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:00'], dtype='datetime64[ns]', freq=None) + DatetimeIndex(['2018-10-26 10:00:00-07:00', '2018-10-26 11:00:00-07:00'], dtype='datetime64[ns, America/Los_Angeles]', freq=None) - Use right format to convert to timezone-aware type (Note that when call Snowpark pandas API to_pandas() the timezone-aware output will always be converted to session timezone): @@ -1763,17 +1760,17 @@ def to_datetime( issued from a timezone with daylight savings, such as Europe/Paris): >>> pd.to_datetime(['2020-10-25 02:00:00 +0200', '2020-10-25 04:00:00 +0100']) - DatetimeIndex(['2020-10-25 02:00:00', '2020-10-25 04:00:00'], dtype='datetime64[ns]', freq=None) + Index(['2020-10-24 17:00:00-07:00', '2020-10-24 20:00:00-07:00'], dtype='datetime64[ns]') >>> pd.to_datetime(['2020-10-25 02:00:00 +0200', '2020-10-25 04:00:00 +0100'], format="%Y-%m-%d %H:%M:%S %z") - DatetimeIndex(['2020-10-24 17:00:00-07:00', '2020-10-24 20:00:00-07:00'], dtype='datetime64[ns, America/Los_Angeles]', freq=None) + Index(['2020-10-24 17:00:00-07:00', '2020-10-24 20:00:00-07:00'], dtype='datetime64[ns]') Setting ``utc=True`` makes sure always convert to timezone-aware outputs: - Timezone-naive inputs are *localized* based on the session timezone >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True) - DatetimeIndex(['2018-10-26 12:00:00-07:00', '2018-10-26 13:00:00-07:00'], dtype='datetime64[ns, America/Los_Angeles]', freq=None) + DatetimeIndex(['2018-10-26 05:00:00-07:00', '2018-10-26 06:00:00-07:00'], dtype='datetime64[ns, America/Los_Angeles]', freq=None) - Timezone-aware inputs are *converted* to session timezone @@ -1784,8 +1781,28 @@ def to_datetime( # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py raise_if_native_pandas_objects(arg) - if arg is None: - return None # same as pandas + if not isinstance(arg, (DataFrame, Series, pd.Index)): + # use pandas.to_datetime to convert local data to datetime + res = pandas.to_datetime( + arg, + errors, + dayfirst, + yearfirst, + utc, + format, + exact, + unit, + infer_datetime_format, + origin, + cache, + ) + if isinstance(res, pandas.Series): + res = pd.Series(res) + elif not is_scalar(res): + res = pd.Index(res) + return res + + # handle modin objs if unit and unit not in VALID_TO_DATETIME_UNIT: raise ValueError(f"Unrecognized unit {unit}") @@ -1795,15 +1812,8 @@ def to_datetime( argument="cache", message="cache parameter is ignored with Snowflake backend, i.e., no caching will be applied", ) - arg_is_scalar = is_scalar(arg) - - if not isinstance(arg, (DataFrame, Series, pd.Index)): - # Turn dictionary like arg into pd.DataFrame and list-like or scalar to - # pd.Index. - arg = [arg] if arg_is_scalar else arg - arg = DataFrame(arg) if isinstance(arg, dict) else pd.Index(arg) - series_or_index = arg._to_datetime( + return arg._to_datetime( errors=errors, dayfirst=dayfirst, yearfirst=yearfirst, @@ -1814,13 +1824,6 @@ def to_datetime( infer_datetime_format=infer_datetime_format, origin=origin, ) - if arg_is_scalar: - # Calling squeeze directly on Snowpark pandas Series makes an unnecessary - # count sql call. To avoid that we convert Snowpark pandas Series to Native - # pandas series first. - # Note: When arg_is_scalar is True 'series_or_index' is always an Index. - return series_or_index.to_series().to_pandas().squeeze() - return series_or_index @snowpark_pandas_telemetry_standalone_function_decorator diff --git a/src/snowflake/snowpark/modin/pandas/indexing.py b/src/snowflake/snowpark/modin/pandas/indexing.py index 0ac62f504ce..c83e3fe41c4 100644 --- a/src/snowflake/snowpark/modin/pandas/indexing.py +++ b/src/snowflake/snowpark/modin/pandas/indexing.py @@ -43,6 +43,7 @@ import numpy as np import pandas +from modin.pandas.base import BasePandasDataset from pandas._libs.tslibs import Resolution, parsing from pandas._typing import AnyArrayLike, Scalar from pandas.api.types import is_bool, is_list_like @@ -58,7 +59,6 @@ import snowflake.snowpark.modin.pandas as pd import snowflake.snowpark.modin.pandas.utils as frontend_utils -from snowflake.snowpark.modin.pandas.base import BasePandasDataset from snowflake.snowpark.modin.pandas.dataframe import DataFrame from snowflake.snowpark.modin.pandas.series import ( SERIES_SETITEM_LIST_LIKE_KEY_AND_RANGE_LIKE_VALUE_ERROR_MESSAGE, diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py index 1ce3ecfc997..6e1b93437a8 100644 --- a/src/snowflake/snowpark/modin/pandas/series.py +++ b/src/snowflake/snowpark/modin/pandas/series.py @@ -31,6 +31,7 @@ import numpy.typing as npt import pandas from modin.pandas.accessor import CachedAccessor, SparseAccessor +from modin.pandas.base import BasePandasDataset from modin.pandas.iterator import PartitionIterator from pandas._libs.lib import NoDefault, is_integer, no_default from pandas._typing import ( @@ -51,17 +52,18 @@ from pandas.core.series import _coerce_method from pandas.util._validators import validate_bool_kwarg -from snowflake.snowpark.modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset from snowflake.snowpark.modin.pandas.utils import ( from_pandas, is_scalar, try_convert_index_to_native, ) +from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike from snowflake.snowpark.modin.plugin.utils.error_message import ( ErrorMessage, series_not_implemented, ) +from snowflake.snowpark.modin.plugin.utils.frontend_constants import _ATTRS_NO_LOOKUP from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage from snowflake.snowpark.modin.utils import ( MODIN_UNNAMED_SERIES_LABEL, @@ -108,7 +110,7 @@ ], apilink="pandas.Series", ) -class Series(BasePandasDataset): +class Series(BasePandasDataset, metaclass=TelemetryMeta): _pandas_class = pandas.Series __array_priority__ = pandas.Series.__array_priority__ diff --git a/src/snowflake/snowpark/modin/pandas/utils.py b/src/snowflake/snowpark/modin/pandas/utils.py index f971e0ff964..32702c8b1a4 100644 --- a/src/snowflake/snowpark/modin/pandas/utils.py +++ b/src/snowflake/snowpark/modin/pandas/utils.py @@ -170,10 +170,9 @@ def is_scalar(obj): bool True if given object is scalar and False otherwise. """ + from modin.pandas.base import BasePandasDataset from pandas.api.types import is_scalar as pandas_is_scalar - from .base import BasePandasDataset - return not isinstance(obj, BasePandasDataset) and pandas_is_scalar(obj) diff --git a/src/snowflake/snowpark/modin/plugin/__init__.py b/src/snowflake/snowpark/modin/plugin/__init__.py index a76b9fe1613..c4172f26696 100644 --- a/src/snowflake/snowpark/modin/plugin/__init__.py +++ b/src/snowflake/snowpark/modin/plugin/__init__.py @@ -63,15 +63,23 @@ import modin.utils # type: ignore[import] # isort: skip # noqa: E402 import modin.pandas.series_utils # type: ignore[import] # isort: skip # noqa: E402 -modin.utils._inherit_docstrings( - docstrings.series_utils.StringMethods, - overwrite_existing=True, -)(modin.pandas.series_utils.StringMethods) - -modin.utils._inherit_docstrings( - docstrings.series_utils.CombinedDatetimelikeProperties, - overwrite_existing=True, -)(modin.pandas.series_utils.DatetimeProperties) +# TODO: SNOW-1643979 pull in fixes for +# https://github.com/modin-project/modin/issues/7113 and https://github.com/modin-project/modin/issues/7134 +# Upstream Modin has issues with certain docstring generation edge cases, so we should use our version instead +_inherit_docstrings = snowflake.snowpark.modin.utils._inherit_docstrings + +inherit_modules = [ + (docstrings.base.BasePandasDataset, modin.pandas.base.BasePandasDataset), + (docstrings.series_utils.StringMethods, modin.pandas.series_utils.StringMethods), + ( + docstrings.series_utils.CombinedDatetimelikeProperties, + modin.pandas.series_utils.DatetimeProperties, + ), +] + +for (doc_module, target_object) in inherit_modules: + _inherit_docstrings(doc_module, overwrite_existing=True)(target_object) + # Don't warn the user about our internal usage of private preview pivot # features. The user should have already been warned that Snowpark pandas diff --git a/src/snowflake/snowpark/modin/plugin/_internal/frame.py b/src/snowflake/snowpark/modin/plugin/_internal/frame.py index b30f244bf16..c4ed377c05c 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/frame.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/frame.py @@ -1295,26 +1295,30 @@ def update_snowflake_quoted_identifiers_with_expressions( existing_id_to_new_id_mapping, ) - def apply_snowpark_function_to_data_columns( - self, snowpark_func: Callable[[Any], SnowparkColumn] + def apply_snowpark_function_to_columns( + self, + snowpark_func: Callable[[Any], SnowparkColumn], + include_index: bool = False, ) -> "InternalFrame": """ - Apply snowpark function callable to data columns of an InternalFrame. The snowflake quoted identifiers - are preserved. + Apply snowpark function callable to all data columns of an InternalFrame. If + include_index is True also apply this function to all index columns. The + snowflake quoted identifiers are preserved. Arguments: - snowpark_func: Snowpark function to apply to data columns of underlying snowpark df. + snowpark_func: Snowpark function to apply to columns of underlying snowpark df. + include_index: Whether to apply the function to index columns as well. Returns: - InternalFrame with snowpark_func applies to data columns of original frame, all other columns remain unchanged. + InternalFrame with snowpark_func applies to columns of original frame, all other columns remain unchanged. """ - new_internal_frame = self.update_snowflake_quoted_identifiers_with_expressions( - { - snowflake_quoted_identifier: snowpark_func(snowflake_quoted_identifier) - for snowflake_quoted_identifier in self.data_column_snowflake_quoted_identifiers - } + snowflake_ids = self.data_column_snowflake_quoted_identifiers + if include_index: + snowflake_ids.extend(self.index_column_snowflake_quoted_identifiers) + + return self.update_snowflake_quoted_identifiers_with_expressions( + {col_id: snowpark_func(col(col_id)) for col_id in snowflake_ids} ).frame - return new_internal_frame def select_active_columns(self) -> "InternalFrame": """ diff --git a/src/snowflake/snowpark/modin/plugin/_internal/groupby_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/groupby_utils.py index 09572a16d87..80e01c4ad32 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/groupby_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/groupby_utils.py @@ -41,6 +41,25 @@ ] NO_GROUPKEY_ERROR = ValueError("No group keys passed!") +GROUPBY_AGG_PRESERVES_SNOWPARK_PANDAS_TYPE = [ + "min", + "max", + "sum", + "mean", + "median", + "std", + "first", + "last", +] +GROUPBY_AGG_WITH_NONE_SNOWPARK_PANDAS_TYPES = [ + "any", + "all", + "count", + "idxmax", + "idxmin", + "size", + "nunique", +] def is_groupby_value_label_like(val: Any) -> bool: diff --git a/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py index 846f3c64079..457bd388f2b 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py @@ -320,12 +320,26 @@ def _create_internal_frame_with_join_or_align_result( ) index_column_types.extend(right.cached_index_column_snowpark_pandas_types) + # If the result ordering column has the same ordering columns as the original left ordering columns, + # that means the original left and right shares the same base, and no actual snowpark join is applied because + # the join is applied on the ordering column or align on the same column. + # This behavior is guaranteed by the align and join methods provided by the OrderingDataframe, when the + # snowpark join is actually applied, the result ordering column will be a combination of + # left.ordering_column and right.ordering_column, plus some assist column. For example, the ordering column + # of left join is left.ordering_column + right.ordering_column. + no_join_applied = ( + result_ordered_frame.ordering_columns == left.ordered_dataframe.ordering_columns + ) + if key_coalesce_config: coalesce_column_identifiers = [] coalesce_column_values = [] for origin_left_col, origin_right_col, coalesce_config in zip( left_on, right_on, key_coalesce_config ): + if coalesce_config == JoinKeyCoalesceConfig.NONE: + continue + coalesce_col_type = None origin_left_col_type = ( left.snowflake_quoted_identifier_to_snowpark_pandas_type[ @@ -337,44 +351,60 @@ def _create_internal_frame_with_join_or_align_result( origin_right_col ] ) - if coalesce_config == JoinKeyCoalesceConfig.NONE: - continue + left_col = result_helper.map_left_quoted_identifiers([origin_left_col])[0] right_col = result_helper.map_right_quoted_identifiers([origin_right_col])[ 0 ] - # Coalescing is only required for 'outer' or 'asof' joins or align. - # For 'inner' and 'left' join we use left join keys and for 'right' join we - # use right join keys. - # For 'left' and 'coalesce' align we use left join keys. - if how in ("asof", "outer"): - # Generate an expression equivalent of - # "COALESCE('left_col', 'right_col') as 'left_col'" - coalesce_column_identifier = ( - result_ordered_frame.generate_snowflake_quoted_identifiers( - pandas_labels=[ - extract_pandas_label_from_snowflake_quoted_identifier( - left_col - ) - ], - )[0] - ) - coalesce_column_identifiers.append(coalesce_column_identifier) - coalesce_column_values.append(coalesce(left_col, right_col)) - if origin_left_col_type == origin_right_col_type: - coalesce_col_type = origin_left_col_type - elif how == "right": - # No coalescing required for 'right' join. Simply use right join key - # as output column. - coalesce_column_identifier = right_col - coalesce_col_type = origin_right_col_type - elif how in ("inner", "left", "coalesce"): - # No coalescing required for 'left' or 'inner' join and for 'left' or - # 'coalesce' align. Simply use left join key as output column. + + if no_join_applied and origin_left_col == origin_right_col: + # if no join is applied, that means the result dataframe, left dataframe and right dataframe + # shares the same base dataframe. If the original left column and original right column are the + # same column, no coalesce is needed, and we always tries to keep the left column to stay align + # with the original dataframe as much as possible to increase the chance for optimization for + # later operations, especially when the later operations are applied with dfs coming from + # the ame dataframe. + # Keep left column can help stay aligned with the original dataframe is because when there are + # conflict between left and right, deduplication always happens at right. For example, when join + # or align left dataframe [col1, col2] and right dataframe [col1, col2], the result dataframe will + # have columns [col1, col2, col1_a12b, col2_de3b], where col1_a12b, col2_de3b are just alias of + # col1 and col2 in right dataframe. + coalesce_config = JoinKeyCoalesceConfig.LEFT coalesce_column_identifier = left_col coalesce_col_type = origin_left_col_type else: - raise AssertionError(f"Unsupported join/align type {how}") + # Coalescing is only required for 'outer' or 'asof' joins or align. + # For 'inner' and 'left' join we use left join keys and for 'right' join we + # use right join keys. + # For 'left' and 'coalesce' align we use left join keys. + if how in ("asof", "outer"): + # Generate an expression equivalent of + # "COALESCE('left_col', 'right_col') as 'left_col'" + coalesce_column_identifier = ( + result_ordered_frame.generate_snowflake_quoted_identifiers( + pandas_labels=[ + extract_pandas_label_from_snowflake_quoted_identifier( + left_col + ) + ], + )[0] + ) + coalesce_column_identifiers.append(coalesce_column_identifier) + coalesce_column_values.append(coalesce(left_col, right_col)) + if origin_left_col_type == origin_right_col_type: + coalesce_col_type = origin_left_col_type + elif how == "right": + # No coalescing required for 'right' join. Simply use right join key + # as output column. + coalesce_column_identifier = right_col + coalesce_col_type = origin_right_col_type + elif how in ("inner", "left", "coalesce"): + # No coalescing required for 'left' or 'inner' join and for 'left' or + # 'coalesce' align. Simply use left join key as output column. + coalesce_column_identifier = left_col + coalesce_col_type = origin_left_col_type + else: + raise AssertionError(f"Unsupported join/align type {how}") if coalesce_config == JoinKeyCoalesceConfig.RIGHT: # swap left_col and right_col @@ -1187,15 +1217,8 @@ def align( # NULL NULL 2 NULL 4 e 2 coalesce_key_config = None inherit_join_index = InheritJoinIndex.FROM_LEFT - # When it is `outer` align, we need to coalesce the align columns. However, if the - # ordering columns of aligned result is the same as the left frame, that means the - # join columns of left and right matches, then there is no need to coalesce the join - # keys, simply inherent from left gives the correct result. - # Retaining the original columns also helps avoid unnecessary join in later steps. - if ( - how == "outer" - and aligned_ordered_frame.ordering_columns != left.ordering_columns - ): + # When it is `outer` align, we need to coalesce the align columns. + if how == "outer": coalesce_key_config = [JoinKeyCoalesceConfig.LEFT] * len(left_on) inherit_join_index = InheritJoinIndex.FROM_BOTH ( diff --git a/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py b/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py index 0a022b0d588..8057cf93885 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py @@ -495,6 +495,49 @@ def wrap(*args, **kwargs): # type: ignore } +def try_add_telemetry_to_attribute(attr_name: str, attr_value: Any) -> Any: + """ + Attempts to add telemetry to an attribute. + + If the attribute is callable with name in TELEMETRY_PRIVATE_METHODS, or is a callable that + starts with an underscore, the original attribute will be returned as-is. Otherwise, a version + of the method/property annotated with Snowpark pandas telemetry is returned. + """ + if callable(attr_value) and ( + not attr_name.startswith("_") or (attr_name in TELEMETRY_PRIVATE_METHODS) + ): + return snowpark_pandas_telemetry_method_decorator(attr_value) + elif isinstance(attr_value, property): + # wrap on getter and setter + return property( + snowpark_pandas_telemetry_method_decorator( + cast( + # add a cast because mypy doesn't recognize that + # non-None fget and __get__ are both callable + # arguments to snowpark_pandas_telemetry_method_decorator. + Callable, + attr_value.__get__ # pragma: no cover: we don't encounter this case in pandas or modin because every property has an fget method. + if attr_value.fget is None + else attr_value.fget, + ), + property_name=attr_name, + property_method_type=PropertyMethodType.FGET, + ), + snowpark_pandas_telemetry_method_decorator( + attr_value.__set__ if attr_value.fset is None else attr_value.fset, + property_name=attr_name, + property_method_type=PropertyMethodType.FSET, + ), + snowpark_pandas_telemetry_method_decorator( + attr_value.__delete__ if attr_value.fdel is None else attr_value.fdel, + property_name=attr_name, + property_method_type=PropertyMethodType.FDEL, + ), + doc=attr_value.__doc__, + ) + return attr_value + + class TelemetryMeta(type): def __new__( cls, name: str, bases: tuple, attrs: dict[str, Any] @@ -536,43 +579,5 @@ def __new__( The modified class with decorated methods. """ for attr_name, attr_value in attrs.items(): - if callable(attr_value) and ( - not attr_name.startswith("_") - or (attr_name in TELEMETRY_PRIVATE_METHODS) - ): - attrs[attr_name] = snowpark_pandas_telemetry_method_decorator( - attr_value - ) - elif isinstance(attr_value, property): - # wrap on getter and setter - attrs[attr_name] = property( - snowpark_pandas_telemetry_method_decorator( - cast( - # add a cast because mypy doesn't recognize that - # non-None fget and __get__ are both callable - # arguments to snowpark_pandas_telemetry_method_decorator. - Callable, - attr_value.__get__ # pragma: no cover: we don't encounter this case in pandas or modin because every property has an fget method. - if attr_value.fget is None - else attr_value.fget, - ), - property_name=attr_name, - property_method_type=PropertyMethodType.FGET, - ), - snowpark_pandas_telemetry_method_decorator( - attr_value.__set__ - if attr_value.fset is None - else attr_value.fset, - property_name=attr_name, - property_method_type=PropertyMethodType.FSET, - ), - snowpark_pandas_telemetry_method_decorator( - attr_value.__delete__ - if attr_value.fdel is None - else attr_value.fdel, - property_name=attr_name, - property_method_type=PropertyMethodType.FDEL, - ), - doc=attr_value.__doc__, - ) + attrs[attr_name] = try_add_telemetry_to_attribute(attr_name, attr_value) return type.__new__(cls, name, bases, attrs) diff --git a/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py index 4860baf4acb..380fe965b4d 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py @@ -123,6 +123,11 @@ the specified time units. """ +AUTO_FORMAT_WARNING_MSG = """Snowflake automatic format detection is used when a format is not provided. +In this case Snowflake's auto format may yield different result values compared to pandas. +See https://docs.snowflake.com/en/sql-reference/date-time-input-output#supported-formats-for-auto-detection for details +""" + # TODO: SNOW-1127160: support other units VALID_TO_DATETIME_UNIT = ["D", "s", "ms", "us", "ns"] @@ -304,9 +309,7 @@ def generate_timestamp_col( if isinstance(datatype, (StringType, VariantType)): WarningMessage.mismatch_with_pandas( "to_datetime", - "Snowpark pandas to_datetime uses Snowflake's automatic format " - "detection to convert string to datetime when a format is not provided. " - "In this case Snowflake's auto format may yield different result values compared to pandas.", + AUTO_FORMAT_WARNING_MSG.replace("\n", ""), ) from snowflake.snowpark.modin.plugin._internal.type_utils import ( diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index e77cb99fa8f..50ce5e71310 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -198,6 +198,8 @@ ) from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame from snowflake.snowpark.modin.plugin._internal.groupby_utils import ( + GROUPBY_AGG_PRESERVES_SNOWPARK_PANDAS_TYPE, + GROUPBY_AGG_WITH_NONE_SNOWPARK_PANDAS_TYPES, check_is_groupby_supported_by_snowflake, extract_groupby_column_pandas_labels, get_frame_with_groupby_columns_as_index, @@ -386,6 +388,8 @@ class SnowflakeQueryCompiler(BaseQueryCompiler): this class is best explained by looking at https://github.com/modin-project/modin/blob/a8be482e644519f2823668210cec5cf1564deb7e/modin/experimental/core/storage_formats/hdk/query_compiler.py """ + lazy_execution = True + def __init__(self, frame: InternalFrame) -> None: """this stores internally a local pandas object (refactor this)""" assert frame is not None and isinstance( @@ -765,6 +769,7 @@ def execute(self) -> None: def to_numpy( self, dtype: Optional[npt.DTypeLike] = None, + copy: Optional[bool] = False, na_value: object = lib.no_default, **kwargs: Any, ) -> np.ndarray: @@ -772,6 +777,12 @@ def to_numpy( # i.e., for something like df.values internally to_numpy().flatten() is called # with flatten being another query compiler call into the numpy frontend layer. # here it's overwritten to actually perform numpy conversion, i.e. return an actual numpy object + if copy: + WarningMessage.ignored_argument( + operation="to_numpy", + argument="copy", + message="copy is ignored in Snowflake backend", + ) return self.to_pandas().to_numpy(dtype=dtype, na_value=na_value, **kwargs) def repartition(self, axis: Any = None) -> "SnowflakeQueryCompiler": @@ -1398,17 +1409,6 @@ def cache_result(self) -> "SnowflakeQueryCompiler": """ return SnowflakeQueryCompiler(self._modin_frame.persist_to_temporary_table()) - @property - def columns(self) -> native_pd.Index: - """ - Get pandas column labels. - - Returns: - an index containing all pandas column labels - """ - # TODO SNOW-837664: add more tests for df.columns - return self._modin_frame.data_columns_index - @snowpark_pandas_type_immutable_check def set_columns(self, new_pandas_labels: Axes) -> "SnowflakeQueryCompiler": """ @@ -1463,6 +1463,12 @@ def set_columns(self, new_pandas_labels: Axes) -> "SnowflakeQueryCompiler": ) return SnowflakeQueryCompiler(new_internal_frame) + # TODO SNOW-837664: add more tests for df.columns + def get_columns(self) -> native_pd.Index: + return self._modin_frame.data_columns_index + + columns: native_pd.Index = property(get_columns, set_columns) + def _shift_values( self, periods: int, axis: Union[Literal[0], Literal[1]], fill_value: Hashable ) -> "SnowflakeQueryCompiler": @@ -1575,12 +1581,9 @@ def _shift_values_axis_1( # Fill all columns with fill value (or NULL) if abs(periods) exceeds column count. if abs(periods) >= len(column_labels): - new_frame = frame.update_snowflake_quoted_identifiers_with_expressions( - { - quoted_identifier: pandas_lit(fill_value) - for quoted_identifier in frame.data_column_snowflake_quoted_identifiers - } - ).frame + new_frame = frame.apply_snowpark_function_to_columns( + lambda column: pandas_lit(fill_value) + ) return self.__constructor__(new_frame) # No fill with fill value when using periods == 0. Can be handled in frontend as well, @@ -2808,6 +2811,8 @@ def reset_index( Returns: A new SnowflakeQueryCompiler instance with updated index. """ + if allow_duplicates is no_default: + allow_duplicates = False # These levels will be moved from index columns to data columns levels_to_be_reset = self._modin_frame.parse_levels_to_integer_levels( level, allow_duplicates=False @@ -3008,9 +3013,11 @@ def first_last_valid_index( def sort_index( self, + *, axis: int, level: Optional[list[Union[str, int]]], ascending: Union[bool, list[bool]], + inplace: bool = False, kind: SortKind, na_position: NaPosition, sort_remaining: bool, @@ -3026,6 +3033,8 @@ def sort_index( level: If not None, sort on values in specified index level(s). ascending: A list of bools to represent ascending vs descending sort. Defaults to True. When the index is a MultiIndex the sort direction can be controlled for each level individually. + inplace: Whether or not the sort occurs in-place. This argument is ignored and only provided + for compatibility with Modin. kind: Choice of sorting algorithm. Perform stable sort if 'stable'. Defaults to unstable sort. Snowpark pandas ignores choice of sorting algorithm except 'stable'. na_position: Puts NaNs at the beginning if 'first'; 'last' puts NaNs at the end. Defaults to 'last' @@ -3231,8 +3240,6 @@ def validate_groupby( KeyError if a hashable label in by (groupby items) can not be found in the current dataframe ValueError if more than one column can be found for the groupby item """ - self._raise_not_implemented_error_for_timedelta() - validate_groupby_columns(self, by, axis, level) def groupby_ngroups( @@ -3328,8 +3335,6 @@ def groupby_agg( Returns: SnowflakeQueryCompiler: with a newly constructed internal dataframe """ - self._raise_not_implemented_error_for_timedelta() - level = groupby_kwargs.get("level", None) if agg_func in ["head", "tail"]: @@ -3422,12 +3427,27 @@ def convert_func_to_agg_func_info( ) # the pandas label and quoted identifier generated for each result column # after aggregation will be used as new pandas label and quoted identifiers. - new_data_column_pandas_labels = [ - col_agg_op.agg_pandas_label for col_agg_op in agg_col_ops - ] - new_data_column_quoted_identifier = [ - col_agg_op.agg_snowflake_quoted_identifier for col_agg_op in agg_col_ops - ] + new_data_column_pandas_labels = [] + new_data_column_quoted_identifiers = [] + new_data_column_snowpark_pandas_types = [] + for col_agg_op in agg_col_ops: + new_data_column_pandas_labels.append(col_agg_op.agg_pandas_label) + new_data_column_quoted_identifiers.append( + col_agg_op.agg_snowflake_quoted_identifier + ) + if agg_func in GROUPBY_AGG_PRESERVES_SNOWPARK_PANDAS_TYPE: + new_data_column_snowpark_pandas_types.append( + col_agg_op.data_type + if isinstance(col_agg_op.data_type, SnowparkPandasType) + else None + ) + elif agg_func in GROUPBY_AGG_WITH_NONE_SNOWPARK_PANDAS_TYPES: + # In the case where the aggregation overrides the type of the output data column + # (e.g. any always returns boolean data columns), set the output Snowpark pandas type to None + new_data_column_snowpark_pandas_types = None # type: ignore + else: + self._raise_not_implemented_error_for_timedelta() + new_data_column_snowpark_pandas_types = None # type: ignore # The ordering of the named aggregations is changed by us when we process # the agg_kwargs into the func dict (named aggregations on the same @@ -3441,10 +3461,14 @@ def convert_func_to_agg_func_info( # and the new_data_column_quoted_identifier. data_column_label_to_quoted_identifier = list( zip( - new_data_column_pandas_labels, new_data_column_quoted_identifier + new_data_column_pandas_labels, + new_data_column_quoted_identifiers, ) ) - new_data_column_pandas_labels, new_data_column_quoted_identifier = list( + ( + new_data_column_pandas_labels, + new_data_column_quoted_identifiers, + ) = list( zip( *[ pair @@ -3565,11 +3589,16 @@ def convert_func_to_agg_func_info( # original pandas label for data columns are still used as pandas labels data_column_pandas_labels=new_data_column_pandas_labels, data_column_pandas_index_names=new_data_column_index_names, - data_column_snowflake_quoted_identifiers=new_data_column_quoted_identifier, + data_column_snowflake_quoted_identifiers=new_data_column_quoted_identifiers, index_column_pandas_labels=new_index_column_pandas_labels, index_column_snowflake_quoted_identifiers=new_index_column_quoted_identifiers, - data_column_types=None, - index_column_types=None, + data_column_types=new_data_column_snowpark_pandas_types, + index_column_types=[ + internal_frame.snowflake_quoted_identifier_to_snowpark_pandas_type.get( + identifier + ) + for identifier in new_index_column_quoted_identifiers + ], ) ) @@ -4578,8 +4607,6 @@ def groupby_size( Returns: SnowflakeQueryCompiler: The result of groupby_size() """ - self._raise_not_implemented_error_for_timedelta() - level = groupby_kwargs.get("level", None) is_supported = check_is_groupby_supported_by_snowflake(by, level, axis) if not is_supported: @@ -4946,8 +4973,6 @@ def groupby_nunique( drop: bool = False, **kwargs: Any, ) -> "SnowflakeQueryCompiler": - self._raise_not_implemented_error_for_timedelta() - # We have to override the Modin version of this function because our groupby frontend passes the # ignored numeric_only argument to this query compiler method, and BaseQueryCompiler # does not have **kwargs. @@ -4971,7 +4996,6 @@ def groupby_any( drop: bool = False, **kwargs: Any, ) -> "SnowflakeQueryCompiler": - self._raise_not_implemented_error_for_timedelta() # We have to override the Modin version of this function because our groupby frontend passes the # ignored numeric_only argument to this query compiler method, and BaseQueryCompiler @@ -4996,7 +5020,6 @@ def groupby_all( drop: bool = False, **kwargs: Any, ) -> "SnowflakeQueryCompiler": - self._raise_not_implemented_error_for_timedelta() # We have to override the Modin version of this function because our groupby frontend passes the # ignored numeric_only argument to this query compiler method, and BaseQueryCompiler @@ -6257,14 +6280,8 @@ def to_timedelta( ErrorMessage.parameter_not_implemented_error("errors", "pd.to_timedelta") internal_frame = self._modin_frame col_ids = internal_frame.data_column_snowflake_quoted_identifiers - data_column_types = [TimedeltaType()] * len(col_ids) - - index_column_types = internal_frame.cached_index_column_snowpark_pandas_types if include_index: col_ids.extend(internal_frame.index_column_snowflake_quoted_identifiers) - index_column_types = [TimedeltaType()] * len( - internal_frame.index_column_snowflake_quoted_identifiers - ) # Raise error if the original data type is not numeric. id_to_type = internal_frame.quoted_identifier_to_snowflake_type(col_ids) @@ -6285,25 +6302,13 @@ def to_timedelta( if not col_ids: return self - internal_frame = ( + return SnowflakeQueryCompiler( internal_frame.update_snowflake_quoted_identifiers_with_expressions( - {col_id: col_to_timedelta(col(col_id), unit) for col_id in col_ids} + {col_id: col_to_timedelta(col(col_id), unit) for col_id in col_ids}, + [TimedeltaType()] * len(col_ids), ).frame ) - return SnowflakeQueryCompiler( - internal_frame.create( - ordered_dataframe=internal_frame.ordered_dataframe, - data_column_pandas_index_names=internal_frame.data_column_pandas_index_names, - data_column_pandas_labels=internal_frame.data_column_pandas_labels, - index_column_pandas_labels=internal_frame.index_column_pandas_labels, - data_column_snowflake_quoted_identifiers=internal_frame.data_column_snowflake_quoted_identifiers, - index_column_snowflake_quoted_identifiers=internal_frame.index_column_snowflake_quoted_identifiers, - data_column_types=data_column_types, - index_column_types=index_column_types, - ) - ) - def series_to_datetime( self, errors: DateTimeErrorChoices = "raise", @@ -8573,8 +8578,8 @@ def isna(self) -> "SnowflakeQueryCompiler": Boolean mask for self of whether an element at the corresponding position is NaN. """ - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: is_null(col_name) + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: is_null(column) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -8588,8 +8593,8 @@ def notna(self) -> "SnowflakeQueryCompiler": Boolean mask for `self` of whether an element at the corresponding position is not NaN. """ - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: not_(is_null(col_name)) + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: not_(is_null(column)) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -10631,9 +10636,9 @@ def dt_property( "minute": minute, "second": second, "day": dayofmonth, - "weekiso": (lambda column: builtin("weekiso")(col(column))), - "dayofweekiso": (lambda column: builtin("dayofweekiso")(col(column))), - "yearofweekiso": (lambda column: builtin("yearofweekiso")(col(column))), + "weekiso": (lambda column: builtin("weekiso")(column)), + "dayofweekiso": (lambda column: builtin("dayofweekiso")(column)), + "yearofweekiso": (lambda column: builtin("yearofweekiso")(column)), "month": month, "year": year, "quarter": quarter, @@ -10641,32 +10646,32 @@ def dt_property( # Use DAYOFWEEKISO for `dayofweek` so that the result doesn't # depend on the Snowflake session's WEEK_START parameter. Subtract # 1 to match pandas semantics. - "dayofweek": (lambda column: builtin("dayofweekiso")(col(column)) - 1), - "weekday": (lambda column: builtin("dayofweekiso")(col(column)) - 1), - "microsecond": (lambda column: floor(date_part("ns", col(column)) / 1000)), - "nanosecond": (lambda column: date_part("ns", col(column)) % 1000), + "dayofweek": (lambda column: builtin("dayofweekiso")(column) - 1), + "weekday": (lambda column: builtin("dayofweekiso")(column) - 1), + "microsecond": (lambda column: floor(date_part("ns", column) / 1000)), + "nanosecond": (lambda column: date_part("ns", column) % 1000), "is_month_start": ( - lambda column: coalesce(dayofmonth(col(column)) == 1, pandas_lit(False)) + lambda column: coalesce(dayofmonth(column) == 1, pandas_lit(False)) ), # To check if it's a month end, make sure that the following day is a month start. "is_month_end": ( lambda column: coalesce( - dayofmonth(dateadd("day", pandas_lit(1), col(column))) == 1, + dayofmonth(dateadd("day", pandas_lit(1), column)) == 1, pandas_lit(False), ) ), "is_quarter_start": ( lambda column: coalesce( - (dayofmonth(col(column)) == 1) - & (month(col(column)).in_(*QUARTER_START_MONTHS)), + (dayofmonth(column) == 1) + & (month(column).in_(*QUARTER_START_MONTHS)), pandas_lit(False), ) ), "is_quarter_end": ( lambda column: coalesce( - (dayofmonth(dateadd("day", pandas_lit(1), col(column))) == 1) + (dayofmonth(dateadd("day", pandas_lit(1), column)) == 1) & ( - month(dateadd("day", pandas_lit(1), col(column))).in_( + month(dateadd("day", pandas_lit(1), column)).in_( *QUARTER_START_MONTHS ) ), @@ -10675,13 +10680,13 @@ def dt_property( ), "is_year_start": ( lambda column: coalesce( - (dayofmonth(col(column)) == 1) & (month(col(column)) == 1), + (dayofmonth(column) == 1) & (month(column) == 1), pandas_lit(False), ) ), "is_year_end": ( lambda column: coalesce( - (dayofmonth(col(column)) == 31) & (month(col(column)) == 12), + (dayofmonth(column) == 31) & (month(column) == 12), pandas_lit(False), ) ), @@ -10691,7 +10696,7 @@ def dt_property( dateadd( "day", pandas_lit(1), - date_from_parts(year(col(column)), 2, 28), + date_from_parts(year(column), 2, 28), ) ) == 29, @@ -10699,9 +10704,9 @@ def dt_property( ) ), "days_in_month": ( - lambda column: when(col(column).is_null(), None) + lambda column: when(column.is_null(), None) .when( - month(col(column)).in_( + month(column).in_( pandas_lit(1), pandas_lit(3), pandas_lit(5), @@ -10713,7 +10718,7 @@ def dt_property( pandas_lit(31), ) .when( - month(col(column)).in_( + month(column).in_( pandas_lit(4), pandas_lit(6), pandas_lit(9), @@ -10727,7 +10732,7 @@ def dt_property( "day", pandas_lit(1), date_from_parts( - year(col(column)), pandas_lit(2), pandas_lit(28) + year(column), pandas_lit(2), pandas_lit(28) ), ) ) @@ -10744,21 +10749,12 @@ def dt_property( f"Snowpark pandas doesn't yet support the property '{class_prefix}.{property_name}'" ) # pragma: no cover - internal_frame = self._modin_frame - snowflake_ids = internal_frame.data_column_snowflake_quoted_identifiers[0:1] - if include_index: - snowflake_ids.extend( - internal_frame.index_column_snowflake_quoted_identifiers - ) - - internal_frame_with_property_column = ( - internal_frame.update_snowflake_quoted_identifiers_with_expressions( - {col_id: property_function(col_id) for col_id in snowflake_ids} + return SnowflakeQueryCompiler( + self._modin_frame.apply_snowpark_function_to_columns( + property_function, include_index ) ) - return SnowflakeQueryCompiler(internal_frame_with_property_column.frame) - def isin( self, values: Union[ @@ -10873,6 +10869,12 @@ def is_multiindex(self, *, axis: int = 0) -> bool: """ return self._modin_frame.is_multiindex(axis=axis) + def abs(self) -> "SnowflakeQueryCompiler": + return self.unary_op("abs") + + def negative(self) -> "SnowflakeQueryCompiler": + return self.unary_op("__neg__") + def unary_op(self, op: str) -> "SnowflakeQueryCompiler": """ Applies a unary operation `op` on each element of the `SnowflakeQueryCompiler`. @@ -10900,7 +10902,7 @@ def unary_op(self, op: str) -> "SnowflakeQueryCompiler": f"The unary operation {op} is currently not supported." ) # pragma: no cover - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( lambda col_name: op_function(col_name) ) @@ -13953,17 +13955,17 @@ def round( for id in ids: id_to_decimal_dict[id] = decimals[label] - def round_col(col_name: ColumnOrName) -> SnowparkColumn: + def round_col(column: SnowparkColumn) -> SnowparkColumn: if is_scalar(decimals): - return snowpark_round(col_name, decimals) + return snowpark_round(column, decimals) elif is_dict_like(decimals): - if col_name in id_to_decimal_dict: - return snowpark_round(col_name, id_to_decimal_dict[col_name]) + if column.get_name() in id_to_decimal_dict: + return snowpark_round(column, id_to_decimal_dict[column.get_name()]) else: - return col(col_name) + return column - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: round_col(col_name) + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: round_col(column) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14489,7 +14491,7 @@ def _str_startswith_endswith( raise TypeError(f"expected a string or tuple, not {type(pat).__name__}") def output_col( - col_name: ColumnOrName, pat: tuple, na: object + column: SnowparkColumn, pat: tuple, na: object ) -> SnowparkColumn: if all([not isinstance(p, str) for p in pat]): new_col = pandas_lit(np.nan) @@ -14499,14 +14501,14 @@ def output_col( new_pat = "|".join( f"{prefix}{re.escape(p)}{suffix}" for p in pat if isinstance(p, str) ) - new_col = col(col_name).rlike(pandas_lit(new_pat)) + new_col = column.rlike(pandas_lit(new_pat)) if any([not isinstance(p, str) for p in pat]): new_col = iff(new_col, pandas_lit(True), pandas_lit(None)) new_col = new_col if na is None else coalesce(new_col, pandas_lit(na)) - return self._replace_non_str(col(col_name), new_col, replacement_value=na) + return self._replace_non_str(column, new_col, replacement_value=na) - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: output_col(col_name, pat, na) + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: output_col(column, pat, na) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14617,17 +14619,15 @@ def str_match( flags = flags | re.IGNORECASE params = self._get_regex_params(flags) - def output_col(col_name: ColumnOrName, pat: str, na: object) -> SnowparkColumn: - new_col = builtin("rlike")( - col(col_name), pandas_lit(pat), pandas_lit(params) - ) + def output_col(column: SnowparkColumn, pat: str, na: object) -> SnowparkColumn: + new_col = builtin("rlike")(column, pandas_lit(pat), pandas_lit(params)) new_col = ( new_col if pandas.isnull(na) else coalesce(new_col, pandas_lit(na)) ) - return self._replace_non_str(col(col_name), new_col, replacement_value=na) + return self._replace_non_str(column, new_col, replacement_value=na) - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: output_col(col_name, pat, na) + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: output_col(column, pat, na) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14647,7 +14647,7 @@ def str_capitalize(self) -> "SnowflakeQueryCompiler": SnowflakeQueryCompiler representing result of the string operation. """ - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( # We use delimeters and set it as the empty string so that we treat the entire string as one word # and thus only capitalize the first character of the first word lambda col: self._replace_non_str( @@ -14670,10 +14670,8 @@ def str_isdigit(self) -> "SnowflakeQueryCompiler": ------- SnowflakeQueryCompiler representing result of the string operation. """ - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: self._replace_non_str( - col(col_name), col(col_name).rlike("[0-9]+") - ) + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: self._replace_non_str(column, column.rlike("[0-9]+")) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14688,12 +14686,12 @@ def str_islower(self) -> "SnowflakeQueryCompiler": ------- SnowflakeQueryCompiler representing result of the string operation. """ - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: self._replace_non_str( - col(col_name), - col(col_name) - .rlike("(.|\n)*[a-zA-Z]+(.|\n)*") - .__and__(col(col_name).__eq__(lower(col_name))), + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: self._replace_non_str( + column, + column.rlike("(.|\n)*[a-zA-Z]+(.|\n)*").__and__( + column.__eq__(lower(column)) + ), ) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14706,12 +14704,12 @@ def str_isupper(self) -> "SnowflakeQueryCompiler": ------- SnowflakeQueryCompiler representing result of the string operation. """ - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: self._replace_non_str( - col(col_name), - col(col_name) - .rlike("(.|\n)*[a-zA-Z]+(.|\n)*") - .__and__(col(col_name).__eq__(upper(col_name))), + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: self._replace_non_str( + column, + column.rlike("(.|\n)*[a-zA-Z]+(.|\n)*").__and__( + column.__eq__(upper(column)) + ), ) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14730,12 +14728,10 @@ def str_istitle(self) -> "SnowflakeQueryCompiler": ------- SnowflakeQueryCompiler representing result of the string operation. """ - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_identifier: self._replace_non_str( - col(col_identifier), - col(col_identifier).rlike( - "^([^a-zA-Z]*[A-Z]{1}[a-z]*([^a-zA-Z]|$)+)+$" - ), + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: self._replace_non_str( + column, + column.rlike("^([^a-zA-Z]*[A-Z]{1}[a-z]*([^a-zA-Z]|$)+)+$"), ) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14754,8 +14750,8 @@ def str_lower(self) -> "SnowflakeQueryCompiler": ------- SnowflakeQueryCompiler representing result of the string operation. """ - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: self._replace_non_str(col(col_name), lower(col_name)) + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: self._replace_non_str(column, lower(column)) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14767,8 +14763,8 @@ def str_upper(self) -> "SnowflakeQueryCompiler": ------- SnowflakeQueryCompiler representing result of the string operation. """ - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: self._replace_non_str(col(col_name), upper(col_name)) + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: self._replace_non_str(column, upper(column)) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14781,7 +14777,7 @@ def str_title(self) -> "SnowflakeQueryCompiler": SnowflakeQueryCompiler representing result of the string operation. """ - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( # Capitalize the first character of each word lambda col: self._replace_non_str(col, initcap(col)) ) @@ -14888,16 +14884,14 @@ def str_contains( flags = flags | re.IGNORECASE params = self._get_regex_params(flags) - def output_col(col_name: ColumnOrName) -> SnowparkColumn: - new_col = builtin("rlike")( - col(col_name), pandas_lit(pat), pandas_lit(params) - ) + def output_col(column: SnowparkColumn) -> SnowparkColumn: + new_col = builtin("rlike")(column, pandas_lit(pat), pandas_lit(params)) new_col = ( new_col if pandas.isnull(na) else coalesce(new_col, pandas_lit(na)) ) - return self._replace_non_str(col(col_name), new_col, replacement_value=na) + return self._replace_non_str(column, new_col, replacement_value=na) - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( output_col ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14925,18 +14919,18 @@ def str_count( """ params = self._get_regex_params(flags) - def output_col(col_name: ColumnOrName) -> SnowparkColumn: + def output_col(column: SnowparkColumn) -> SnowparkColumn: if pat == "": # Special case to handle empty search pattern. # Snowflake's regexp_count returns 0, while pandas returns string length + 1. - new_col = length(col(col_name)) + 1 + new_col = length(column) + 1 else: new_col = builtin("regexp_count")( - col(col_name), pandas_lit(pat), 1, pandas_lit(params) + column, pandas_lit(pat), 1, pandas_lit(params) ) - return self._replace_non_str(col(col_name), new_col) + return self._replace_non_str(column, new_col) - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( output_col ) return SnowflakeQueryCompiler(new_internal_frame) @@ -14961,8 +14955,8 @@ def str_get(self, i: int) -> "SnowflakeQueryCompiler": "Snowpark pandas method 'Series.str.get' doesn't yet support non-numeric 'i' argument" ) - def output_col(col_name: ColumnOrName) -> SnowparkColumn: - col_len_exp = length(col(col_name)) + def output_col(column: SnowparkColumn) -> SnowparkColumn: + col_len_exp = length(column) if i is None: new_col = pandas_lit(None) elif i < 0: @@ -14972,9 +14966,7 @@ def output_col(col_name: ColumnOrName) -> SnowparkColumn: new_col = iff( pandas_lit(i) + col_len_exp < pandas_lit(0), pandas_lit(None), - substring( - col(col_name), pandas_lit(i + 1) + col_len_exp, pandas_lit(1) - ), + substring(column, pandas_lit(i + 1) + col_len_exp, pandas_lit(1)), ) else: assert i >= 0 @@ -14984,11 +14976,11 @@ def output_col(col_name: ColumnOrName) -> SnowparkColumn: new_col = iff( pandas_lit(i) >= col_len_exp, pandas_lit(None), - substring(col(col_name), pandas_lit(i + 1), pandas_lit(1)), + substring(column, pandas_lit(i + 1), pandas_lit(1)), ) - return self._replace_non_str(col(col_name), new_col) + return self._replace_non_str(column, new_col) - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( output_col ) return SnowflakeQueryCompiler(new_internal_frame) @@ -15028,7 +15020,7 @@ def str_len(self, **kwargs: Any) -> "SnowflakeQueryCompiler": """ # TODO SNOW-1438001: Handle dict, list, and tuple values for Series.str.len(). return SnowflakeQueryCompiler( - self._modin_frame.apply_snowpark_function_to_data_columns( + self._modin_frame.apply_snowpark_function_to_columns( lambda col: self._replace_non_str(col, length(col)) ) ) @@ -15066,14 +15058,14 @@ def str_slice( """ def output_col( - col_name: ColumnOrName, + column: SnowparkColumn, start: Optional[int], stop: Optional[int], step: Optional[int], ) -> SnowparkColumn: if step is None: step = 1 - col_len_exp = length(col(col_name)) + col_len_exp = length(column) # In what follows, we define the expressions needed to evaluate the correct start and stop positions for a slice. # In general, the start position needs to be included and the stop position needs to be excluded from the slice. @@ -15139,12 +15131,12 @@ def output_col( if step < 0: # When step is negative, we flip the column string value along with the start and # stop positions. Step can be considered positive now. - new_col = reverse(col(col_name)) + new_col = reverse(column) start_exp = col_len_exp - start_exp + pandas_lit(1) stop_exp = col_len_exp - stop_exp + pandas_lit(1) step = -step else: - new_col = col(col_name) + new_col = column # End of evaluation for start and end positions. # If step is 1, then slicing is no different than getting a substring. @@ -15170,10 +15162,10 @@ def output_col( pandas_lit(f"((.|\n)(.|\n){{{step-1}}})"), pandas_lit("\\2"), ) - return self._replace_non_str(col(col_name), new_col) + return self._replace_non_str(column, new_col) - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: output_col(col_name, start, stop, step) + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: output_col(column, start, stop, step) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -15241,7 +15233,7 @@ def str_split( ) def output_col( - col_name: ColumnOrName, pat: Optional[str], n: int + column: SnowparkColumn, pat: Optional[str], n: int ) -> SnowparkColumn: if pandas.isnull(pat): # When pat is null, it means we need to split on whitespace. @@ -15252,22 +15244,20 @@ def output_col( regex_pat = r"\s+" regex_pat_as_prefix = r"\s+.*" new_col = builtin("regexp_replace")( - builtin("trim")(col(col_name), pandas_lit(whitespace_chars)), + builtin("trim")(column, pandas_lit(whitespace_chars)), pandas_lit(regex_pat), pandas_lit(" "), ) n_for_split_idx = iff( - builtin("regexp_like")( - col(col_name), pandas_lit(regex_pat_as_prefix) - ), + builtin("regexp_like")(column, pandas_lit(regex_pat_as_prefix)), pandas_lit(n + 1), pandas_lit(n), ) else: new_pat = str(pat) regex_pat = re.escape(str(pat)) - new_col = col(col_name) + new_col = column n_for_split_idx = pandas_lit(n) if np.isnan(n): @@ -15288,7 +15278,7 @@ def output_col( # it will not be split and will remain intact irrespective of the number of # delimiter occurrences it has. split_idx = builtin("regexp_instr")( - col(col_name), pandas_lit(regex_pat), 1, n_for_split_idx, 1 + column, pandas_lit(regex_pat), 1, n_for_split_idx, 1 ) new_col = iff( builtin("array_size")( @@ -15303,20 +15293,20 @@ def output_col( pandas_lit(n), ), builtin("substr")( - col(col_name), + column, split_idx, ), ), ) if pandas.isnull(pat): new_col = iff( - builtin("regexp_like")(col(col_name), pandas_lit(r"\s*")), + builtin("regexp_like")(column, pandas_lit(r"\s*")), pandas_lit([]), new_col, ) - return self._replace_non_str(col(col_name), new_col) + return self._replace_non_str(column, new_col) - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( lambda col_name: output_col(col_name, pat, n) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -15390,7 +15380,7 @@ def str_replace( flags = flags | re.IGNORECASE def output_col( - col_name: ColumnOrName, pat: str, n: int, flags: int + column: SnowparkColumn, pat: str, n: int, flags: int ) -> SnowparkColumn: if regex or (case is not None and not case) or n > 0: # Here we handle the cases where SQL's regexp_replace rather than SQL's replace @@ -15400,14 +15390,10 @@ def output_col( params = self._get_regex_params(flags) if n < 0: # Replace all occurrences. - new_col = builtin("regexp_replace")( - col(col_name), pat, repl, 1, 0, params - ) + new_col = builtin("regexp_replace")(column, pat, repl, 1, 0, params) elif n == 1: # Replace first occurrence. - new_col = builtin("regexp_replace")( - col(col_name), pat, repl, 1, 1, params - ) + new_col = builtin("regexp_replace")(column, pat, repl, 1, 1, params) else: # Replace first n occurences through these steps: # (1) Find index of nth occurence (if present). @@ -15415,39 +15401,33 @@ def output_col( # (3) Replace all occurrences in the left part and leave right part unchanged. # (4) Concat left and right parts. split_idx = iff( - builtin("regexp_instr")(col(col_name), pat, 1, 1, 1, params) - == 0, + builtin("regexp_instr")(column, pat, 1, 1, 1, params) == 0, 0, iff( - builtin("regexp_instr")(col(col_name), pat, 1, n, 1, params) - == 0, - builtin("len")(col(col_name)) + 1, - builtin("regexp_instr")( - col(col_name), pat, 1, n, 1, params - ), + builtin("regexp_instr")(column, pat, 1, n, 1, params) == 0, + builtin("len")(column) + 1, + builtin("regexp_instr")(column, pat, 1, n, 1, params), ) - 1, ) new_col = builtin("concat")( builtin("regexp_replace")( - builtin("left")(col(col_name), split_idx), + builtin("left")(column, split_idx), pat, repl, 1, 0, params, ), - builtin("right")( - col(col_name), builtin("len")(col(col_name)) - split_idx - ), + builtin("right")(column, builtin("len")(column) - split_idx), ) else: # Replace all occurrences using SQL's replace. - new_col = builtin("replace")(col(col_name), pat, repl) - return self._replace_non_str(col(col_name), new_col) + new_col = builtin("replace")(column, pat, repl) + return self._replace_non_str(column, new_col) - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: output_col(col_name, pat, n, flags) + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( + lambda column: output_col(column, pat, n, flags) ) return SnowflakeQueryCompiler(new_internal_frame) @@ -15488,11 +15468,11 @@ def _str_strip_variant( if to_strip is None: to_strip = "\t\n\r\f " - def output_col(col_name: ColumnOrName) -> SnowparkColumn: - new_col = sp_func(col(col_name), pandas_lit(to_strip)) - return self._replace_non_str(col(col_name), new_col) + def output_col(column: SnowparkColumn) -> SnowparkColumn: + new_col = sp_func(column, pandas_lit(to_strip)) + return self._replace_non_str(column, new_col) - new_internal_frame = self._modin_frame.apply_snowpark_function_to_data_columns( + new_internal_frame = self._modin_frame.apply_snowpark_function_to_columns( output_col ) return SnowflakeQueryCompiler(new_internal_frame) @@ -15613,9 +15593,9 @@ def str_translate(self, table: dict) -> "SnowflakeQueryCompiler": source_alphabet = "".join(single_char_pairs.keys()) + "".join(none_keys) target_alphabet = "".join(single_char_pairs.values()) return SnowflakeQueryCompiler( - self._modin_frame.apply_snowpark_function_to_data_columns( - lambda col_name: translate( - col(col_name), + self._modin_frame.apply_snowpark_function_to_columns( + lambda column: translate( + column, pandas_lit(source_alphabet), pandas_lit(target_alphabet), ) @@ -16176,25 +16156,17 @@ def dt_ceil( if slice_unit not in SUPPORTED_DT_FLOOR_CEIL_FREQS: ErrorMessage.parameter_not_implemented_error(f"freq='{freq}'", method_name) - def ceil_func(col_id: str) -> SnowparkColumn: - base_column = col(col_id) + def ceil_func(column: SnowparkColumn) -> SnowparkColumn: floor_column = builtin("time_slice")( - base_column, slice_length, slice_unit, "START" - ) - ceil_column = builtin("time_slice")( - base_column, slice_length, slice_unit, "END" + column, slice_length, slice_unit, "START" ) - return iff(base_column.equal_null(floor_column), base_column, ceil_column) - - frame = self._modin_frame - snowflake_ids = frame.data_column_snowflake_quoted_identifiers[0:1] - if include_index: - snowflake_ids.extend(frame.index_column_snowflake_quoted_identifiers) + ceil_column = builtin("time_slice")(column, slice_length, slice_unit, "END") + return iff(column.equal_null(floor_column), column, ceil_column) return SnowflakeQueryCompiler( - frame.update_snowflake_quoted_identifiers_with_expressions( - {col_id: ceil_func(col_id) for col_id in snowflake_ids} - ).frame + self._modin_frame.apply_snowpark_function_to_columns( + ceil_func, include_index + ) ) def dt_round( @@ -16270,19 +16242,15 @@ def slice_length_when_unit_is_second(slice_length: int, slice_unit: str) -> int: slice_length, slice_unit = down_level_freq(slice_length, slice_unit) return slice_length - def round_func(col_id: str) -> SnowparkColumn: - base_column = col(col_id) - + def round_func(column: SnowparkColumn) -> SnowparkColumn: # Second, we determine whether floor represents an even number of slices. # To do so, we must divide the number of epoch seconds in it over the number # of epoch seconds in one slice. This way, we can get the number of slices. floor_column = builtin("time_slice")( - base_column, slice_length, slice_unit, "START" - ) - ceil_column = builtin("time_slice")( - base_column, slice_length, slice_unit, "END" + column, slice_length, slice_unit, "START" ) + ceil_column = builtin("time_slice")(column, slice_length, slice_unit, "END") floor_epoch_seconds_column = builtin("extract")( "epoch_second", floor_column @@ -16307,7 +16275,7 @@ def round_func(col_id: str) -> SnowparkColumn: # In case the date value is not at half point of the slice, then we shift it # by half a slice, and take the floor from there. base_plus_half_slice_column = dateadd( - slice_unit, pandas_lit(half_slice_length), base_column + slice_unit, pandas_lit(half_slice_length), column ) round_column_if_not_half_point = builtin("time_slice")( base_plus_half_slice_column, slice_length, slice_unit, "START" @@ -16320,15 +16288,10 @@ def round_func(col_id: str) -> SnowparkColumn: round_column_if_not_half_point, ) - frame = self._modin_frame - snowflake_ids = frame.data_column_snowflake_quoted_identifiers[0:1] - if include_index: - snowflake_ids.extend(frame.index_column_snowflake_quoted_identifiers) - return SnowflakeQueryCompiler( - frame.update_snowflake_quoted_identifiers_with_expressions( - {col_id: round_func(col_id) for col_id in snowflake_ids} - ).frame + self._modin_frame.apply_snowpark_function_to_columns( + round_func, include_index + ) ) def dt_floor( @@ -16371,18 +16334,13 @@ def dt_floor( if slice_unit not in SUPPORTED_DT_FLOOR_CEIL_FREQS: ErrorMessage.parameter_not_implemented_error(f"freq='{freq}'", method_name) - frame = self._modin_frame - snowflake_ids = frame.data_column_snowflake_quoted_identifiers[0:1] - if include_index: - snowflake_ids.extend(frame.index_column_snowflake_quoted_identifiers) + def floor_func(column: SnowparkColumn) -> SnowparkColumn: + return builtin("time_slice")(column, slice_length, slice_unit) return SnowflakeQueryCompiler( - frame.update_snowflake_quoted_identifiers_with_expressions( - { - col_id: builtin("time_slice")(col(col_id), slice_length, slice_unit) - for col_id in snowflake_ids - } - ).frame + self._modin_frame.apply_snowpark_function_to_columns( + floor_func, include_index + ) ) def dt_normalize(self, include_index: bool = False) -> "SnowflakeQueryCompiler": @@ -16397,20 +16355,14 @@ def dt_normalize(self, include_index: bool = False) -> "SnowflakeQueryCompiler": BaseQueryCompiler New QueryCompiler containing date-time values with midnight time. """ - internal_frame = self._modin_frame - def normalize_column(col_id: str) -> SnowparkColumn: - return builtin("date_trunc")("d", col(col_id)) + def normalize_column(column: SnowparkColumn) -> SnowparkColumn: + return builtin("date_trunc")("d", column) - snowflake_ids = internal_frame.data_column_snowflake_quoted_identifiers[0:1] - if include_index: - snowflake_ids.extend( - internal_frame.index_column_snowflake_quoted_identifiers - ) return SnowflakeQueryCompiler( - internal_frame.update_snowflake_quoted_identifiers_with_expressions( - {col_id: normalize_column(col_id) for col_id in snowflake_ids} - ).frame + self._modin_frame.apply_snowpark_function_to_columns( + normalize_column, include_index + ) ) def dt_month_name( @@ -16429,7 +16381,6 @@ def dt_month_name( ErrorMessage.parameter_not_implemented_error( "locale", f"{class_name}.month_name" ) - internal_frame = self._modin_frame # The following generates a mapping list of the form: # [1, "January", 2, "February", ..., 12, "December"] @@ -16438,25 +16389,15 @@ def dt_month_name( for i in range(2, 26) ] - snowflake_ids = internal_frame.data_column_snowflake_quoted_identifiers[0:1] - if include_index: - snowflake_ids.extend( - internal_frame.index_column_snowflake_quoted_identifiers - ) + def month_name_func(column: SnowparkColumn) -> SnowparkColumn: + return builtin("decode")(builtin("extract")("month", column), *mapping_list) - internal_frame = ( - internal_frame.update_snowflake_quoted_identifiers_with_expressions( - { - col_id: builtin("decode")( - builtin("extract")("month", col(col_id)), *mapping_list - ) - for col_id in snowflake_ids - } + return SnowflakeQueryCompiler( + self._modin_frame.apply_snowpark_function_to_columns( + month_name_func, include_index ) ) - return SnowflakeQueryCompiler(internal_frame.frame) - def dt_day_name( self, locale: Optional[str] = None, include_index: bool = False ) -> "SnowflakeQueryCompiler": @@ -16473,7 +16414,6 @@ def dt_day_name( ErrorMessage.parameter_not_implemented_error( "locale", f"{class_name}.day_name" ) - internal_frame = self._modin_frame # The following generates a mapping list of the form: # [1, "Monday", 2, "Tuesday", ..., 7, "Sunday"] @@ -16482,25 +16422,15 @@ def dt_day_name( for i in range(0, 14) ] - snowflake_ids = internal_frame.data_column_snowflake_quoted_identifiers[0:1] - if include_index: - snowflake_ids.extend( - internal_frame.index_column_snowflake_quoted_identifiers - ) + def day_name_func(column: SnowparkColumn) -> SnowparkColumn: + return builtin("decode")(builtin("dayofweekiso")(column), *mapping_list) - internal_frame = ( - internal_frame.update_snowflake_quoted_identifiers_with_expressions( - { - col_id: builtin("decode")( - builtin("dayofweekiso")(col(col_id)), *mapping_list - ) - for col_id in snowflake_ids - } + return SnowflakeQueryCompiler( + self._modin_frame.apply_snowpark_function_to_columns( + day_name_func, include_index ) ) - return SnowflakeQueryCompiler(internal_frame.frame) - def dt_total_seconds(self) -> None: """ Return total duration of each element expressed in seconds. diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/base.py b/src/snowflake/snowpark/modin/plugin/docstrings/base.py index a6a0aff1af4..3ba4f2f2dab 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/base.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/base.py @@ -386,6 +386,25 @@ Series([], dtype: bool) """ +_get_set_index_doc = """ +{desc} + +{parameters_or_returns} + +Note +---- +When setting `DataFrame.index` or `Series.index` where the length of the +`Series`/`DataFrame` object does not match with the new index's length, +pandas raises a ValueError. Snowpark pandas does not raise this error; +this operation is valid. +When the `Series`/`DataFrame` object is longer than the new index, +the `Series`/`DataFrame`'s new index is filled with `NaN` values for +the "extra" elements. When the `Series`/`DataFrame` object is shorter than +the new index, the extra values in the new index are ignored—`Series` and +`DataFrame` stay the same length `n`, and use only the first `n` values of +the new index. +""" + class BasePandasDataset: """ @@ -3594,3 +3613,21 @@ def __array_function__(): BasePandasDataset The result of the ufunc applied to the `BasePandasDataset`. """ + + @doc( + _get_set_index_doc, + desc="Get the index for this `Series`/`DataFrame`.", + parameters_or_returns="Returns\n-------\nIndex\n The index for this `Series`/`DataFrame`.", + ) + def _get_index(): + pass + + @doc( + _get_set_index_doc, + desc="Set the index for this `Series`/`DataFrame`.", + parameters_or_returns="Parameters\n----------\nnew_index : Index\n The new index to set.", + ) + def _set_index(): + pass + + index = property(_get_index, _set_index) diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py index a42ef48eb94..f0c02aa0e65 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py @@ -13,6 +13,8 @@ _shared_docs, ) +from .base import BasePandasDataset + _doc_binary_op_kwargs = {"returns": "BasePandasDataset", "left": "BasePandasDataset"} @@ -49,7 +51,7 @@ } -class DataFrame: +class DataFrame(BasePandasDataset): """ Snowpark pandas representation of ``pandas.DataFrame`` with a lazily-evaluated relational dataset. @@ -3832,6 +3834,18 @@ def set_index(): DataFrame or None Changed row labels or None if ``inplace=True``. + Note + ---- + When performing ``DataFrame.set_index`` where the length of the + :class:`DataFrame` object does not match with the new index's length, + a ``ValueError`` is not raised. When the :class:`DataFrame` object is + longer than the new index, the :class:`DataFrame`'s new index is filled + with ``NaN`` values for the "extra" elements. When the :class:`DataFrame` + object is shorter than the new index, the extra values in the new index + are ignored—the :class:`DataFrame` stays the same length ``n``, + and uses only the first ``n`` values of the new index. + + See Also -------- DataFrame.reset_index : Opposite of set_index. diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/resample.py b/src/snowflake/snowpark/modin/plugin/docstrings/resample.py index b152fb9ed45..a1414b1ce18 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/resample.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/resample.py @@ -200,7 +200,7 @@ def ffill(): 2020-01-06 3 Freq: None, dtype: int64 - >>> lst2 = pd.to_datetime(['2023-01-03 1:00:00', '2023-01-04', '2023-01-05 23:00:00', '2023-01-06', '2023-01-07 2:00:00', '2023-01-10']) + >>> lst2 = pd.to_datetime(pd.Index(['2023-01-03 1:00:00', '2023-01-04', '2023-01-05 23:00:00', '2023-01-06', '2023-01-07 2:00:00', '2023-01-10'])) >>> ser2 = pd.Series([1, 2, 3, 4, None, 6], index=lst2) >>> ser2 2023-01-03 01:00:00 1.0 @@ -257,7 +257,7 @@ def ffill(): 2020-01-03 0 15 2020-01-06 2 17 - >>> index2 = pd.to_datetime(['2023-01-03 1:00:00', '2023-01-04', '2023-01-05 23:00:00', '2023-01-06', '2023-01-07 2:00:00', '2023-01-10']) + >>> index2 = pd.to_datetime(pd.Index(['2023-01-03 1:00:00', '2023-01-04', '2023-01-05 23:00:00', '2023-01-06', '2023-01-07 2:00:00', '2023-01-10'])) >>> df2 = pd.DataFrame({'a': range(len(index2)), ... 'b': range(len(index2) + 10, len(index2) * 2 + 10)}, ... index=index2) diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py index 6e48a7e57f3..4878c82635a 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py @@ -15,6 +15,8 @@ ) from snowflake.snowpark.modin.utils import _create_operator_docstring +from .base import BasePandasDataset + _shared_doc_kwargs = { "axes": "index", "klass": "Series", @@ -35,7 +37,7 @@ } -class Series: +class Series(BasePandasDataset): """ Snowpark pandas representation of `pandas.Series` with a lazily-evaluated relational dataset. diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_extensions.py b/src/snowflake/snowpark/modin/plugin/extensions/base_extensions.py new file mode 100644 index 00000000000..496136d736e --- /dev/null +++ b/src/snowflake/snowpark/modin/plugin/extensions/base_extensions.py @@ -0,0 +1,46 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +""" +File containing BasePandasDataset APIs defined in Snowpark pandas but not the Modin API layer. +""" + +from snowflake.snowpark.modin.plugin._internal.telemetry import ( + snowpark_pandas_telemetry_method_decorator, +) + +from .base_overrides import register_base_override + + +@register_base_override("__array_function__") +@snowpark_pandas_telemetry_method_decorator +def __array_function__(self, func: callable, types: tuple, args: tuple, kwargs: dict): + """ + Apply the `func` to the `BasePandasDataset`. + + Parameters + ---------- + func : np.func + The NumPy func to apply. + types : tuple + The types of the args. + args : tuple + The args to the func. + kwargs : dict + Additional keyword arguments. + + Returns + ------- + BasePandasDataset + The result of the ufunc applied to the `BasePandasDataset`. + """ + from snowflake.snowpark.modin.plugin.utils.numpy_to_pandas import ( + numpy_to_pandas_func_map, + ) + + if func.__name__ in numpy_to_pandas_func_map: + return numpy_to_pandas_func_map[func.__name__](*args, **kwargs) + else: + # per NEP18 we raise NotImplementedError so that numpy can intercept + return NotImplemented # pragma: no cover diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py index 332df757787..abbcb9bc762 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py @@ -6,31 +6,123 @@ Methods defined on BasePandasDataset that are overridden in Snowpark pandas. Adding a method to this file should be done with discretion, and only when relevant changes cannot be made to the query compiler or upstream frontend to accommodate Snowpark pandas. + +If you must override a method in this file, please add a comment describing why it must be overridden, +and if possible, whether this can be reconciled with upstream Modin. """ from __future__ import annotations import pickle as pkl -from typing import Any +import warnings +from collections.abc import Sequence +from typing import Any, Callable, Hashable, Literal, Mapping, get_args +import modin.pandas as pd import numpy as np +import numpy.typing as npt import pandas from modin.pandas.base import BasePandasDataset -from pandas._libs.lib import no_default +from pandas._libs import lib +from pandas._libs.lib import NoDefault, is_bool, no_default from pandas._typing import ( + AggFuncType, + AnyArrayLike, + Axes, Axis, CompressionOptions, + FillnaOptions, + IgnoreRaise, + IndexKeyFunc, + IndexLabel, + Level, + NaPosition, + RandomState, + Scalar, StorageOptions, TimedeltaConvertibleTypes, + TimestampConvertibleTypes, +) +from pandas.core.common import apply_if_callable +from pandas.core.dtypes.common import ( + is_dict_like, + is_dtype_equal, + is_list_like, + is_numeric_dtype, + pandas_dtype, +) +from pandas.core.dtypes.inference import is_integer +from pandas.core.methods.describe import _refine_percentiles +from pandas.errors import SpecificationError +from pandas.util._validators import ( + validate_ascending, + validate_bool_kwarg, + validate_percentile, ) +import snowflake.snowpark.modin.pandas as spd from snowflake.snowpark.modin.pandas.api.extensions import ( register_dataframe_accessor, register_series_accessor, ) +from snowflake.snowpark.modin.pandas.utils import ( + ensure_index, + extract_validate_and_try_convert_named_aggs_from_kwargs, + get_as_shape_compatible_dataframe_or_series, + is_scalar, + raise_if_native_pandas_objects, + validate_and_try_convert_agg_func_arg_func_to_str, +) from snowflake.snowpark.modin.plugin._internal.telemetry import ( snowpark_pandas_telemetry_method_decorator, + try_add_telemetry_to_attribute, +) +from snowflake.snowpark.modin.plugin._typing import ListLike +from snowflake.snowpark.modin.plugin.utils.error_message import ( + ErrorMessage, + base_not_implemented, ) -from snowflake.snowpark.modin.plugin.utils.error_message import base_not_implemented +from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage +from snowflake.snowpark.modin.utils import validate_int_kwarg + + +def register_base_override(method_name: str): + """ + Decorator function to override a method on BasePandasDataset. Since Modin does not provide a mechanism + for directly overriding methods on BasePandasDataset, we mock this by performing the override on + DataFrame and Series, and manually performing a `setattr` on the base class. These steps are necessary + to allow both the docstring extension and method dispatch to work properly. + + Methods annotated here also are automatically instrumented with Snowpark pandas telemetry. + """ + + def decorator(base_method: Any): + base_method = try_add_telemetry_to_attribute(method_name, base_method) + parent_method = getattr(BasePandasDataset, method_name, None) + if isinstance(parent_method, property): + parent_method = parent_method.fget + # If the method was not defined on Series/DataFrame and instead inherited from the superclass + # we need to override it as well because the MRO was already determined or something? + # TODO: SNOW-1063347 + # Since we still use the vendored version of Series and the overrides for the top-level + # namespace haven't been performed yet, we need to set properties on the vendored version + series_method = getattr(spd.series.Series, method_name, None) + if isinstance(series_method, property): + series_method = series_method.fget + if series_method is None or series_method is parent_method: + register_series_accessor(method_name)(base_method) + # TODO: SNOW-1063346 + # Since we still use the vendored version of DataFrame and the overrides for the top-level + # namespace haven't been performed yet, we need to set properties on the vendored version + df_method = getattr(spd.dataframe.DataFrame, method_name, None) + if isinstance(df_method, property): + df_method = df_method.fget + if df_method is None or df_method is parent_method: + register_dataframe_accessor(method_name)(base_method) + # Replace base method + setattr(BasePandasDataset, method_name, base_method) + return base_method + + return decorator def register_base_not_implemented(): @@ -303,3 +395,1901 @@ def truncate( @register_base_not_implemented() def __finalize__(self, other, method=None, **kwargs): pass # pragma: no cover + + +# === OVERRIDDEN METHODS === +# The below methods have their frontend implementations overridden compared to the version present +# in base.py. This is usually for one of the following reasons: +# 1. The underlying QC interface used differs from that of modin. Notably, this applies to aggregate +# and binary operations; further work is needed to refactor either our implementation or upstream +# modin's implementation. +# 2. Modin performs extra validation queries that perform extra SQL queries. Some of these are already +# fixed on main; see https://github.com/modin-project/modin/issues/7340 for details. +# 3. Upstream Modin defaults to pandas for some edge cases. Defaulting to pandas at the query compiler +# layer is acceptable because we can force the method to raise NotImplementedError, but if a method +# defaults at the frontend, Modin raises a warning and performs the operation by coercing the +# dataset to a native pandas object. Removing these is tracked by +# https://github.com/modin-project/modin/issues/7104 +# 4. Snowpark pandas uses different default arguments from modin. This occurs if some parameters are +# only partially supported (like `numeric_only=True` for `skew`), but this behavior should likewise +# be revisited. + +# `aggregate` for axis=1 is performed as a call to `BasePandasDataset.apply` in upstream Modin, +# which is unacceptable for Snowpark pandas. Upstream Modin should be changed to allow the query +# compiler or a different layer to control dispatch. +@register_base_override("aggregate") +def aggregate( + self, func: AggFuncType = None, axis: Axis | None = 0, *args: Any, **kwargs: Any +): + """ + Aggregate using one or more operations over the specified axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + from snowflake.snowpark.modin.pandas import Series + + origin_axis = axis + axis = self._get_axis_number(axis) + + if axis == 1 and isinstance(self, Series): + raise ValueError(f"No axis named {origin_axis} for object type Series") + + if len(self._query_compiler.columns) == 0: + # native pandas raise error with message "no result", here we raise a more readable error. + raise ValueError("No column to aggregate on.") + + # If we are using named kwargs, then we do not clear the kwargs (need them in the QC for processing + # order, as well as formatting error messages.) + uses_named_kwargs = False + # If aggregate is called on a Series, named aggregations can be passed in via a dictionary + # to func. + if func is None or (is_dict_like(func) and not self._is_dataframe): + if axis == 1: + raise ValueError( + "`func` must not be `None` when `axis=1`. Named aggregations are not supported with `axis=1`." + ) + if func is not None: + # If named aggregations are passed in via a dictionary to func, then we + # ignore the kwargs. + if any(is_dict_like(value) for value in func.values()): + # We can only get to this codepath if self is a Series, and func is a dictionary. + # In this case, if any of the values of func are themselves dictionaries, we must raise + # a Specification Error, as that is what pandas does. + raise SpecificationError("nested renamer is not supported") + kwargs = func + func = extract_validate_and_try_convert_named_aggs_from_kwargs( + self, allow_duplication=False, axis=axis, **kwargs + ) + uses_named_kwargs = True + else: + func = validate_and_try_convert_agg_func_arg_func_to_str( + agg_func=func, + obj=self, + allow_duplication=False, + axis=axis, + ) + + # This is to stay consistent with pandas result format, when the func is single + # aggregation function in format of callable or str, reduce the result dimension to + # convert dataframe to series, or convert series to scalar. + # Note: When named aggregations are used, the result is not reduced, even if there + # is only a single function. + # needs_reduce_dimension cannot be True if we are using named aggregations, since + # the values for func in that case are either NamedTuples (AggFuncWithLabels) or + # lists of NamedTuples, both of which are list like. + need_reduce_dimension = ( + (callable(func) or isinstance(func, str)) + # A Series should be returned when a single scalar string/function aggregation function, or a + # dict of scalar string/functions is specified. In all other cases (including if the function + # is a 1-element list), the result is a DataFrame. + # + # The examples below have axis=1, but the same logic is applied for axis=0. + # >>> df = pd.DataFrame({"a": [0, 1], "b": [2, 3]}) + # + # single aggregation: return Series + # >>> df.agg("max", axis=1) + # 0 2 + # 1 3 + # dtype: int64 + # + # list of aggregations: return DF + # >>> df.agg(["max"], axis=1) + # max + # 0 2 + # 1 3 + # + # dict where all aggregations are strings: return Series + # >>> df.agg({1: "max", 0: "min"}, axis=1) + # 1 3 + # 0 0 + # dtype: int64 + # + # dict where one element is a list: return DF + # >>> df.agg({1: "max", 0: ["min"]}, axis=1) + # max min + # 1 3.0 NaN + # 0 NaN 0.0 + or ( + is_dict_like(func) + and all(not is_list_like(value) for value in func.values()) + ) + ) + + # If func is a dict, pandas will not respect kwargs for each aggregation function, and + # we should drop them before passing the to the query compiler. + # + # >>> native_pd.DataFrame({"a": [0, 1], "b": [np.nan, 0]}).agg("max", skipna=False, axis=1) + # 0 NaN + # 1 1.0 + # dtype: float64 + # >>> native_pd.DataFrame({"a": [0, 1], "b": [np.nan, 0]}).agg(["max"], skipna=False, axis=1) + # max + # 0 0.0 + # 1 1.0 + # >>> pd.DataFrame([[np.nan], [0]]).aggregate("count", skipna=True, axis=0) + # 0 1 + # dtype: int8 + # >>> pd.DataFrame([[np.nan], [0]]).count(skipna=True, axis=0) + # TypeError: got an unexpected keyword argument 'skipna' + if is_dict_like(func) and not uses_named_kwargs: + kwargs.clear() + + result = self.__constructor__( + query_compiler=self._query_compiler.agg( + func=func, + axis=axis, + args=args, + kwargs=kwargs, + ) + ) + + if need_reduce_dimension: + if self._is_dataframe: + result = Series(query_compiler=result._query_compiler) + + if isinstance(result, Series): + # When func is just "quantile" with a scalar q, result has quantile value as name + q = kwargs.get("q", 0.5) + if func == "quantile" and is_scalar(q): + result.name = q + else: + result.name = None + + # handle case for single scalar (same as result._reduce_dimension()) + if isinstance(self, Series): + return result.to_pandas().squeeze() + + return result + + +# `agg` is an alias of `aggregate`. +agg = aggregate +register_base_override("agg")(agg) + + +# `_agg_helper` is not defined in modin, and used by Snowpark pandas to do extra validation. +@register_base_override("_agg_helper") +def _agg_helper( + self, + func: str, + skipna: bool = True, + axis: int | None | NoDefault = no_default, + numeric_only: bool = False, + **kwargs: Any, +): + if not self._is_dataframe and numeric_only and not is_numeric_dtype(self.dtype): + # Series aggregations on non-numeric data do not support numeric_only: + # https://github.com/pandas-dev/pandas/blob/cece8c6579854f6b39b143e22c11cac56502c4fd/pandas/core/series.py#L6358 + raise TypeError( + f"Series.{func} does not allow numeric_only=True with non-numeric dtypes." + ) + axis = self._get_axis_number(axis) + numeric_only = validate_bool_kwarg(numeric_only, "numeric_only", none_allowed=True) + skipna = validate_bool_kwarg(skipna, "skipna", none_allowed=False) + agg_kwargs: dict[str, Any] = { + "numeric_only": numeric_only, + "skipna": skipna, + } + agg_kwargs.update(kwargs) + return self.aggregate(func=func, axis=axis, **agg_kwargs) + + +# See _agg_helper +@register_base_override("count") +def count( + self, + axis: Axis | None = 0, + numeric_only: bool = False, +): + """ + Count non-NA cells for `BasePandasDataset`. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self._agg_helper( + func="count", + axis=axis, + numeric_only=numeric_only, + ) + + +# See _agg_helper +@register_base_override("max") +def max( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs: Any, +): + """ + Return the maximum of the values over the requested axis. + """ + return self._agg_helper( + func="max", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("min") +def min( + self, + axis: Axis | None | NoDefault = no_default, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, +): + """ + Return the minimum of the values over the requested axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self._agg_helper( + func="min", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("mean") +def mean( + self, + axis: Axis | None | NoDefault = no_default, + skipna: bool = True, + numeric_only: bool = False, + **kwargs: Any, +): + """ + Return the mean of the values over the requested axis. + """ + return self._agg_helper( + func="mean", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("median") +def median( + self, + axis: Axis | None | NoDefault = no_default, + skipna: bool = True, + numeric_only: bool = False, + **kwargs: Any, +): + """ + Return the mean of the values over the requested axis. + """ + return self._agg_helper( + func="median", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("std") +def std( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, +): + """ + Return sample standard deviation over requested axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + kwargs.update({"ddof": ddof}) + return self._agg_helper( + func="std", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("sum") +def sum( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + min_count: int = 0, + **kwargs: Any, +): + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + min_count = validate_int_kwarg(min_count, "min_count") + kwargs.update({"min_count": min_count}) + return self._agg_helper( + func="sum", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("var") +def var( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only: bool = False, + **kwargs: Any, +): + """ + Return unbiased variance over requested axis. + """ + kwargs.update({"ddof": ddof}) + return self._agg_helper( + func="var", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# Modin does not provide `MultiIndex` support and will default to pandas when `level` is specified, +# and allows binary ops against native pandas objects that Snowpark pandas prohibits. +@register_base_override("_binary_op") +def _binary_op( + self, + op: str, + other: BasePandasDataset, + axis: Axis = None, + level: Level | None = None, + fill_value: float | None = None, + **kwargs: Any, +): + """ + Do binary operation between two datasets. + + Parameters + ---------- + op : str + Name of binary operation. + other : modin.pandas.BasePandasDataset + Second operand of binary operation. + axis: Whether to compare by the index (0 or ‘index’) or columns. (1 or ‘columns’). + level: Broadcast across a level, matching Index values on the passed MultiIndex level. + fill_value: Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing the result will be missing. + only arithmetic binary operation has this parameter (e.g., add() has, but eq() doesn't have). + + kwargs can contain the following parameters passed in at the frontend: + func: Only used for `combine` method. Function that takes two series as inputs and + return a Series or a scalar. Used to merge the two dataframes column by columns. + + Returns + ------- + modin.pandas.BasePandasDataset + Result of binary operation. + """ + # In upstream modin, _axis indicates the operator will use the default axis + if kwargs.pop("_axis", None) is None: + if axis is not None: + axis = self._get_axis_number(axis) + else: + axis = 1 + else: + axis = 0 + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + raise_if_native_pandas_objects(other) + axis = self._get_axis_number(axis) + squeeze_self = isinstance(self, pd.Series) + + # pandas itself will ignore the axis argument when using Series.. + # Per default, it is set to axis=0. However, for the case of a Series interacting with + # a DataFrame the behavior is axis=1. Manually check here for this case and adjust the axis. + + is_lhs_series_and_rhs_dataframe = ( + True + if isinstance(self, pd.Series) and isinstance(other, pd.DataFrame) + else False + ) + + new_query_compiler = self._query_compiler.binary_op( + op=op, + other=other, + axis=1 if is_lhs_series_and_rhs_dataframe else axis, + level=level, + fill_value=fill_value, + squeeze_self=squeeze_self, + **kwargs, + ) + + from snowflake.snowpark.modin.pandas.dataframe import DataFrame + + # Modin Bug: https://github.com/modin-project/modin/issues/7236 + # For a Series interacting with a DataFrame, always return a DataFrame + return ( + DataFrame(query_compiler=new_query_compiler) + if is_lhs_series_and_rhs_dataframe + else self._create_or_update_from_compiler(new_query_compiler) + ) + + +# Current Modin does not use _dropna and instead defines `dropna` directly, but Snowpark pandas +# Series/DF still do. Snowpark pandas still needs to add support for the `ignore_index` parameter +# (added in pandas 2.0), and should be able to refactor to remove this override. +@register_base_override("_dropna") +def _dropna( + self, + axis: Axis = 0, + how: str | NoDefault = no_default, + thresh: int | NoDefault = no_default, + subset: IndexLabel = None, + inplace: bool = False, +): + inplace = validate_bool_kwarg(inplace, "inplace") + + if is_list_like(axis): + raise TypeError("supplying multiple axes to axis is no longer supported.") + + axis = self._get_axis_number(axis) + + if (how is not no_default) and (thresh is not no_default): + raise TypeError( + "You cannot set both the how and thresh arguments at the same time." + ) + + if how is no_default: + how = "any" + if how not in ["any", "all"]: + raise ValueError("invalid how option: %s" % how) + if subset is not None: + if axis == 1: + indices = self.index.get_indexer_for(subset) + check = indices == -1 + if check.any(): + raise KeyError(list(np.compress(check, subset))) + else: + indices = self.columns.get_indexer_for(subset) + check = indices == -1 + if check.any(): + raise KeyError(list(np.compress(check, subset))) + + new_query_compiler = self._query_compiler.dropna( + axis=axis, + how=how, + thresh=thresh, + subset=subset, + ) + return self._create_or_update_from_compiler(new_query_compiler, inplace) + + +# Snowpark pandas uses `self_is_series` instead of `squeeze_self` and `squeeze_value` to determine +# the shape of `self` and `value`. Further work is needed to reconcile these two approaches. +@register_base_override("fillna") +def fillna( + self, + self_is_series, + value: Hashable | Mapping | pd.Series | pd.DataFrame = None, + method: FillnaOptions | None = None, + axis: Axis | None = None, + inplace: bool = False, + limit: int | None = None, + downcast: dict | None = None, +): + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + self_is_series : bool + If True then self contains a Series object, if False then self contains + a DataFrame object. + value : scalar, dict, Series, or DataFrame, default: None + Value to use to fill holes (e.g. 0), alternately a + dict/Series/DataFrame of values specifying which value to use for + each index (for a Series) or column (for a DataFrame). Values not + in the dict/Series/DataFrame will not be filled. This value cannot + be a list. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default: None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use next valid observation to fill gap. + axis : {None, 0, 1}, default: None + Axis along which to fill missing values. + inplace : bool, default: False + If True, fill in-place. Note: this will modify any + other views on this object (e.g., a no-copy slice for a column in a + DataFrame). + limit : int, default: None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. Must be greater than 0 if not None. + downcast : dict, default: None + A dict of item->dtype of what to downcast if possible, + or the string 'infer' which will try to downcast to an appropriate + equal type (e.g. float64 to int64 if possible). + + Returns + ------- + Series, DataFrame or None + Object with missing values filled or None if ``inplace=True``. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + raise_if_native_pandas_objects(value) + inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + if isinstance(value, (list, tuple)): + raise TypeError( + '"value" parameter must be a scalar or dict, but ' + + f'you passed a "{type(value).__name__}"' + ) + if value is None and method is None: + # same as pandas + raise ValueError("Must specify a fill 'value' or 'method'.") + if value is not None and method is not None: + raise ValueError("Cannot specify both 'value' and 'method'.") + if method is not None and method not in ["backfill", "bfill", "pad", "ffill"]: + expecting = "pad (ffill) or backfill (bfill)" + msg = "Invalid fill method. Expecting {expecting}. Got {method}".format( + expecting=expecting, method=method + ) + raise ValueError(msg) + if limit is not None: + if not isinstance(limit, int): + raise ValueError("Limit must be an integer") + elif limit <= 0: + raise ValueError("Limit must be greater than 0") + + new_query_compiler = self._query_compiler.fillna( + self_is_series=self_is_series, + value=value, + method=method, + axis=axis, + limit=limit, + downcast=downcast, + ) + return self._create_or_update_from_compiler(new_query_compiler, inplace) + + +# Snowpark pandas passes the query compiler object from a BasePandasDataset, which Modin does not do. +@register_base_override("isin") +def isin( + self, values: BasePandasDataset | ListLike | dict[Hashable, ListLike] +) -> BasePandasDataset: # noqa: PR01, RT01, D200 + """ + Whether elements in `BasePandasDataset` are contained in `values`. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + + # Pass as query compiler if values is BasePandasDataset. + if isinstance(values, BasePandasDataset): + values = values._query_compiler + + # Convert non-dict values to List if values is neither List[Any] nor np.ndarray. SnowflakeQueryCompiler + # expects for the non-lazy case, where values is not a BasePandasDataset, the data to be materialized + # as list or numpy array. Because numpy may perform implicit type conversions, use here list to be more general. + elif not isinstance(values, dict) and ( + not isinstance(values, list) or not isinstance(values, np.ndarray) + ): + values = list(values) + + return self.__constructor__(query_compiler=self._query_compiler.isin(values=values)) + + +# Snowpark pandas uses the single `quantiles_along_axis0` query compiler method, while upstream +# Modin splits this into `quantile_for_single_value` and `quantile_for_list_of_values` calls. +# It should be possible to merge those two functions upstream and reconcile the implementations. +@register_base_override("quantile") +def quantile( + self, + q: Scalar | ListLike = 0.5, + axis: Axis = 0, + numeric_only: bool = False, + interpolation: Literal[ + "linear", "lower", "higher", "midpoint", "nearest" + ] = "linear", + method: Literal["single", "table"] = "single", +) -> float | BasePandasDataset: + """ + Return values at the given quantile over requested axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + axis = self._get_axis_number(axis) + + # TODO + # - SNOW-1008361: support axis=1 + # - SNOW-1008367: support when q is Snowpandas DF/Series (need to require QC interface to accept QC q values) + # - SNOW-1003587: support datetime/timedelta columns + + if axis == 1 or interpolation not in ["linear", "nearest"] or method != "single": + ErrorMessage.not_implemented( + f"quantile function with parameters axis={axis}, interpolation={interpolation}, method={method} not supported" + ) + + if not numeric_only: + # If not numeric_only and columns, then check all columns are either + # numeric, timestamp, or timedelta + # Check if dtype is numeric, timedelta ("m"), or datetime ("M") + if not axis and not all( + is_numeric_dtype(t) or lib.is_np_dtype(t, "mM") for t in self._get_dtypes() + ): + raise TypeError("can't multiply sequence by non-int of type 'float'") + # If over rows, then make sure that all dtypes are equal for not + # numeric_only + elif axis: + for i in range(1, len(self._get_dtypes())): + pre_dtype = self._get_dtypes()[i - 1] + curr_dtype = self._get_dtypes()[i] + if not is_dtype_equal(pre_dtype, curr_dtype): + raise TypeError( + "Cannot compare type '{}' with type '{}'".format( + pre_dtype, curr_dtype + ) + ) + else: + # Normally pandas returns this near the end of the quantile, but we + # can't afford the overhead of running the entire operation before + # we error. + if not any(is_numeric_dtype(t) for t in self._get_dtypes()): + raise ValueError("need at least one array to concatenate") + + # check that all qs are between 0 and 1 + validate_percentile(q) + axis = self._get_axis_number(axis) + query_compiler = self._query_compiler.quantiles_along_axis0( + q=q if is_list_like(q) else [q], + numeric_only=numeric_only, + interpolation=interpolation, + method=method, + ) + if is_list_like(q): + return self.__constructor__(query_compiler=query_compiler) + else: + # result is either a scalar or Series + result = self._reduce_dimension(query_compiler.transpose_single_row()) + if isinstance(result, BasePandasDataset): + result.name = q + return result + + +# Current Modin does not define this method. Snowpark pandas currently only uses it in +# `DataFrame.set_index`. Modin does not support MultiIndex, or have its own lazy index class, +# so we may need to keep this method for the foreseeable future. +@register_base_override("_to_series_list") +def _to_series_list(self, index: pd.Index) -> list[pd.Series]: + """ + Convert index to a list of series + Args: + index: can be single or multi index + + Returns: + the list of series + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + if isinstance(index, pd.MultiIndex): + return [ + pd.Series(index.get_level_values(level)) for level in range(index.nlevels) + ] + elif isinstance(index, pd.Index): + return [pd.Series(index)] + else: + raise Exception("invalid index: " + str(index)) + + +# Upstream modin defaults to pandas when `suffix` is provided. +@register_base_override("shift") +def shift( + self, + periods: int | Sequence[int] = 1, + freq=None, + axis: Axis = 0, + fill_value: Hashable = no_default, + suffix: str | None = None, +) -> BasePandasDataset: + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + if periods == 0 and freq is None: + # Check obvious case first, freq manipulates the index even for periods == 0 so check for it in addition. + return self.copy() + + # pandas compatible ValueError for freq='infer' + # TODO: Test as part of SNOW-1023324. + if freq == "infer": # pragma: no cover + if not hasattr(self, "freq") and not hasattr( # pragma: no cover + self, "inferred_freq" # pragma: no cover + ): # pragma: no cover + raise ValueError() # pragma: no cover + + axis = self._get_axis_number(axis) + + if fill_value == no_default: + fill_value = None + + new_query_compiler = self._query_compiler.shift( + periods, freq, axis, fill_value, suffix + ) + return self._create_or_update_from_compiler(new_query_compiler, False) + + +# Snowpark pandas supports only `numeric_only=True`, which is not the default value of the argument, +# so we have this overridden. We should revisit this behavior. +@register_base_override("skew") +def skew( + self, + axis: Axis | None | NoDefault = no_default, + skipna: bool = True, + numeric_only=True, + **kwargs, +): # noqa: PR01, RT01, D200 + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + """ + Return unbiased skew over requested axis. + """ + return self._stat_operation("skew", axis, skipna, numeric_only, **kwargs) + + +@register_base_override("resample") +def resample( + self, + rule, + axis: Axis = lib.no_default, + closed: str | None = None, + label: str | None = None, + convention: str = "start", + kind: str | None = None, + on: Level = None, + level: Level = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, + group_keys=no_default, +): # noqa: PR01, RT01, D200 + """ + Resample time-series data. + """ + from snowflake.snowpark.modin.pandas.resample import Resampler + + if axis is not lib.no_default: # pragma: no cover + axis = self._get_axis_number(axis) + if axis == 1: + warnings.warn( + "DataFrame.resample with axis=1 is deprecated. Do " + + "`frame.T.resample(...)` without axis instead.", + FutureWarning, + stacklevel=1, + ) + else: + warnings.warn( + f"The 'axis' keyword in {type(self).__name__}.resample is " + + "deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=1, + ) + else: + axis = 0 + + return Resampler( + dataframe=self, + rule=rule, + axis=axis, + closed=closed, + label=label, + convention=convention, + kind=kind, + on=on, + level=level, + origin=origin, + offset=offset, + group_keys=group_keys, + ) + + +# Snowpark pandas needs to return a custom Expanding window object. We cannot use the +# extensions module for this at the moment because modin performs a relative import of +# `from .window import Expanding`. +@register_base_override("expanding") +def expanding(self, min_periods=1, axis=0, method="single"): # noqa: PR01, RT01, D200 + """ + Provide expanding window calculations. + """ + from snowflake.snowpark.modin.pandas.window import Expanding + + if axis is not lib.no_default: + axis = self._get_axis_number(axis) + name = "expanding" + if axis == 1: + warnings.warn( + f"Support for axis=1 in {type(self).__name__}.{name} is " + + "deprecated and will be removed in a future version. " + + f"Use obj.T.{name}(...) instead", + FutureWarning, + stacklevel=1, + ) + else: + warnings.warn( + f"The 'axis' keyword in {type(self).__name__}.{name} is " + + "deprecated and will be removed in a future version. " + + "Call the method without the axis keyword instead.", + FutureWarning, + stacklevel=1, + ) + else: + axis = 0 + + return Expanding( + self, + min_periods=min_periods, + axis=axis, + method=method, + ) + + +# Same as Expanding: Snowpark pandas needs to return a custmo Window object. +@register_base_override("rolling") +def rolling( + self, + window, + min_periods: int | None = None, + center: bool = False, + win_type: str | None = None, + on: str | None = None, + axis: Axis = lib.no_default, + closed: str | None = None, + step: int | None = None, + method: str = "single", +): # noqa: PR01, RT01, D200 + """ + Provide rolling window calculations. + """ + if axis is not lib.no_default: + axis = self._get_axis_number(axis) + name = "rolling" + if axis == 1: + warnings.warn( + f"Support for axis=1 in {type(self).__name__}.{name} is " + + "deprecated and will be removed in a future version. " + + f"Use obj.T.{name}(...) instead", + FutureWarning, + stacklevel=1, + ) + else: # pragma: no cover + warnings.warn( + f"The 'axis' keyword in {type(self).__name__}.{name} is " + + "deprecated and will be removed in a future version. " + + "Call the method without the axis keyword instead.", + FutureWarning, + stacklevel=1, + ) + else: + axis = 0 + + if win_type is not None: + from snowflake.snowpark.modin.pandas.window import Window + + return Window( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + step=step, + method=method, + ) + from snowflake.snowpark.modin.pandas.window import Rolling + + return Rolling( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + step=step, + method=method, + ) + + +# Snowpark pandas uses a custom indexer object for all indexing methods. +@register_base_override("iloc") +@property +def iloc(self): + """ + Purely integer-location based indexing for selection by position. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + # TODO: SNOW-930028 enable all skipped doctests + from snowflake.snowpark.modin.pandas.indexing import _iLocIndexer + + return _iLocIndexer(self) + + +# Snowpark pandas uses a custom indexer object for all indexing methods. +@register_base_override("loc") +@property +def loc(self): + """ + Get a group of rows and columns by label(s) or a boolean array. + """ + # TODO: SNOW-935444 fix doctest where index key has name + # TODO: SNOW-933782 fix multiindex transpose bug, e.g., Name: (cobra, mark ii) => Name: ('cobra', 'mark ii') + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + from snowflake.snowpark.modin.pandas.indexing import _LocIndexer + + return _LocIndexer(self) + + +# Snowpark pandas uses a custom indexer object for all indexing methods. +@register_base_override("iat") +@property +def iat(self, axis=None): # noqa: PR01, RT01, D200 + """ + Get a single value for a row/column pair by integer position. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + from snowflake.snowpark.modin.pandas.indexing import _iAtIndexer + + return _iAtIndexer(self) + + +# Snowpark pandas uses a custom indexer object for all indexing methods. +@register_base_override("at") +@property +def at(self, axis=None): # noqa: PR01, RT01, D200 + """ + Get a single value for a row/column label pair. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + from snowflake.snowpark.modin.pandas.indexing import _AtIndexer + + return _AtIndexer(self) + + +# Snowpark pandas performs different dispatch logic; some changes may need to be upstreamed +# to fix edge case indexing behaviors. +@register_base_override("__getitem__") +def __getitem__(self, key): + """ + Retrieve dataset according to `key`. + + Parameters + ---------- + key : callable, scalar, slice, str or tuple + The global row index to retrieve data from. + + Returns + ------- + BasePandasDataset + Located dataset. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + key = apply_if_callable(key, self) + # If a slice is passed in, use .iloc[key]. + if isinstance(key, slice): + if (is_integer(key.start) or key.start is None) and ( + is_integer(key.stop) or key.stop is None + ): + return self.iloc[key] + else: + return self.loc[key] + + # If the object calling getitem is a Series, only use .loc[key] to filter index. + if isinstance(self, pd.Series): + return self.loc[key] + + # Sometimes the result of a callable is a DataFrame (e.g. df[df > 0]) - use where. + elif isinstance(key, pd.DataFrame): + return self.where(cond=key) + + # If the object is a boolean list-like object, use .loc[key] to filter index. + # The if statement is structured this way to avoid calling dtype and reduce query count. + if isinstance(key, pd.Series): + if pandas.api.types.is_bool_dtype(key.dtype): + return self.loc[key] + elif is_list_like(key): + if hasattr(key, "dtype"): + if pandas.api.types.is_bool_dtype(key.dtype): + return self.loc[key] + if (all(is_bool(k) for k in key)) and len(key) > 0: + return self.loc[key] + + # In all other cases, use .loc[:, key] to filter columns. + return self.loc[:, key] + + +# Snowpark pandas does extra argument validation, which may need to be upstreamed. +@register_base_override("sort_values") +def sort_values( + self, + by, + axis=0, + ascending=True, + inplace: bool = False, + kind="quicksort", + na_position="last", + ignore_index: bool = False, + key: IndexKeyFunc | None = None, +): # noqa: PR01, RT01, D200 + """ + Sort by the values along either axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + axis = self._get_axis_number(axis) + inplace = validate_bool_kwarg(inplace, "inplace") + ascending = validate_ascending(ascending) + if axis == 0: + # If any column is None raise KeyError (same a native pandas). + if by is None or (isinstance(by, list) and None in by): + # Same error message as native pandas. + raise KeyError(None) + if not isinstance(by, list): + by = [by] + + # Convert 'ascending' to sequence if needed. + if not isinstance(ascending, Sequence): + ascending = [ascending] * len(by) + if len(by) != len(ascending): + # Same error message as native pandas. + raise ValueError( + f"Length of ascending ({len(ascending)})" + f" != length of by ({len(by)})" + ) + + columns = self._query_compiler.columns.values.tolist() + index_names = self._query_compiler.get_index_names() + for by_col in by: + col_count = columns.count(by_col) + index_count = index_names.count(by_col) + if col_count == 0 and index_count == 0: + # Same error message as native pandas. + raise KeyError(by_col) + if col_count and index_count: + # Same error message as native pandas. + raise ValueError( + f"'{by_col}' is both an index level and a column label, which is ambiguous." + ) + if col_count > 1: + # Same error message as native pandas. + raise ValueError(f"The column label '{by_col}' is not unique.") + + if na_position not in get_args(NaPosition): + # Same error message as native pandas for invalid 'na_position' value. + raise ValueError(f"invalid na_position: {na_position}") + result = self._query_compiler.sort_rows_by_column_values( + by, + ascending=ascending, + kind=kind, + na_position=na_position, + ignore_index=ignore_index, + key=key, + ) + else: + result = self._query_compiler.sort_columns_by_row_values( + by, + ascending=ascending, + kind=kind, + na_position=na_position, + ignore_index=ignore_index, + key=key, + ) + return self._create_or_update_from_compiler(result, inplace) + + +# Modin does not define `where` on BasePandasDataset, and defaults to pandas at the frontend +# layer for Series. +@register_base_override("where") +def where( + self, + cond: BasePandasDataset | Callable | AnyArrayLike, + other: BasePandasDataset | Callable | Scalar | None = np.nan, + inplace: bool = False, + axis: Axis | None = None, + level: Level | None = None, +): + """ + Replace values where the condition is False. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + # TODO: SNOW-985670: Refactor `where` and `mask` + # will move pre-processing to QC layer. + inplace = validate_bool_kwarg(inplace, "inplace") + if cond is None: + raise ValueError("Array conditional must be same shape as self") + + cond = apply_if_callable(cond, self) + + if isinstance(cond, Callable): + raise NotImplementedError("Do not support callable for 'cond' parameter.") + + from snowflake.snowpark.modin.pandas import Series + + if isinstance(cond, Series): + cond._query_compiler._shape_hint = "column" + if isinstance(self, Series): + self._query_compiler._shape_hint = "column" + if isinstance(other, Series): + other._query_compiler._shape_hint = "column" + + if not isinstance(cond, BasePandasDataset): + cond = get_as_shape_compatible_dataframe_or_series(cond, self) + cond._query_compiler._shape_hint = "array" + + if other is not None: + other = apply_if_callable(other, self) + + if isinstance(other, np.ndarray): + other = get_as_shape_compatible_dataframe_or_series( + other, + self, + shape_mismatch_message="other must be the same shape as self when an ndarray", + ) + other._query_compiler._shape_hint = "array" + + if isinstance(other, BasePandasDataset): + other = other._query_compiler + + query_compiler = self._query_compiler.where( + cond._query_compiler, + other, + axis, + level, + ) + + return self._create_or_update_from_compiler(query_compiler, inplace) + + +# Snowpark pandas performs extra argument validation, some of which should be pushed down +# to the QC layer. +@register_base_override("mask") +def mask( + self, + cond: BasePandasDataset | Callable | AnyArrayLike, + other: BasePandasDataset | Callable | Scalar | None = np.nan, + inplace: bool = False, + axis: Axis | None = None, + level: Level | None = None, +): + """ + Replace values where the condition is True. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + # TODO: https://snowflakecomputing.atlassian.net/browse/SNOW-985670 + # will move pre-processing to QC layer. + inplace = validate_bool_kwarg(inplace, "inplace") + if cond is None: + raise ValueError("Array conditional must be same shape as self") + + cond = apply_if_callable(cond, self) + + if isinstance(cond, Callable): + raise NotImplementedError("Do not support callable for 'cond' parameter.") + + from snowflake.snowpark.modin.pandas import Series + + if isinstance(cond, Series): + cond._query_compiler._shape_hint = "column" + if isinstance(self, Series): + self._query_compiler._shape_hint = "column" + if isinstance(other, Series): + other._query_compiler._shape_hint = "column" + + if not isinstance(cond, BasePandasDataset): + cond = get_as_shape_compatible_dataframe_or_series(cond, self) + cond._query_compiler._shape_hint = "array" + + if other is not None: + other = apply_if_callable(other, self) + + if isinstance(other, np.ndarray): + other = get_as_shape_compatible_dataframe_or_series( + other, + self, + shape_mismatch_message="other must be the same shape as self when an ndarray", + ) + other._query_compiler._shape_hint = "array" + + if isinstance(other, BasePandasDataset): + other = other._query_compiler + + query_compiler = self._query_compiler.mask( + cond._query_compiler, + other, + axis, + level, + ) + + return self._create_or_update_from_compiler(query_compiler, inplace) + + +# Snowpark pandas uses a custom I/O dispatcher class. +@register_base_override("to_csv") +def to_csv( + self, + path_or_buf=None, + sep=",", + na_rep="", + float_format=None, + columns=None, + header=True, + index=True, + index_label=None, + mode="w", + encoding=None, + compression="infer", + quoting=None, + quotechar='"', + lineterminator=None, + chunksize=None, + date_format=None, + doublequote=True, + escapechar=None, + decimal=".", + errors: str = "strict", + storage_options: StorageOptions = None, +): # pragma: no cover + from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import ( + FactoryDispatcher, + ) + + return FactoryDispatcher.to_csv( + self._query_compiler, + path_or_buf=path_or_buf, + sep=sep, + na_rep=na_rep, + float_format=float_format, + columns=columns, + header=header, + index=index, + index_label=index_label, + mode=mode, + encoding=encoding, + compression=compression, + quoting=quoting, + quotechar=quotechar, + lineterminator=lineterminator, + chunksize=chunksize, + date_format=date_format, + doublequote=doublequote, + escapechar=escapechar, + decimal=decimal, + errors=errors, + storage_options=storage_options, + ) + + +# Modin performs extra argument validation and defaults to pandas for some edge cases. +@register_base_override("sample") +def sample( + self, + n: int | None = None, + frac: float | None = None, + replace: bool = False, + weights: str | np.ndarray | None = None, + random_state: RandomState | None = None, + axis: Axis | None = None, + ignore_index: bool = False, +): + """ + Return a random sample of items from an axis of object. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + if self._get_axis_number(axis): + if weights is not None and isinstance(weights, str): + raise ValueError( + "Strings can only be passed to weights when sampling from rows on a DataFrame" + ) + else: + if n is None and frac is None: + n = 1 + elif n is not None and frac is not None: + raise ValueError("Please enter a value for `frac` OR `n`, not both") + else: + if n is not None: + if n < 0: + raise ValueError( + "A negative number of rows requested. Please provide `n` >= 0." + ) + if n % 1 != 0: + raise ValueError("Only integers accepted as `n` values") + else: + if frac < 0: + raise ValueError( + "A negative number of rows requested. Please provide `frac` >= 0." + ) + + query_compiler = self._query_compiler.sample( + n, frac, replace, weights, random_state, axis, ignore_index + ) + return self.__constructor__(query_compiler=query_compiler) + + +# Modin performs an extra query calling self.isna() to raise a warning when fill_method is unspecified. +@register_base_override("pct_change") +def pct_change( + self, periods=1, fill_method=no_default, limit=no_default, freq=None, **kwargs +): # noqa: PR01, RT01, D200 + """ + Percentage change between the current and a prior element. + """ + if fill_method not in (lib.no_default, None) or limit is not lib.no_default: + warnings.warn( + "The 'fill_method' keyword being not None and the 'limit' keyword in " + + f"{type(self).__name__}.pct_change are deprecated and will be removed " + + "in a future version. Either fill in any non-leading NA values prior " + + "to calling pct_change or specify 'fill_method=None' to not fill NA " + + "values.", + FutureWarning, + stacklevel=1, + ) + if fill_method is lib.no_default: + warnings.warn( + f"The default fill_method='pad' in {type(self).__name__}.pct_change is " + + "deprecated and will be removed in a future version. Either fill in any " + + "non-leading NA values prior to calling pct_change or specify 'fill_method=None' " + + "to not fill NA values.", + FutureWarning, + stacklevel=1, + ) + fill_method = "pad" + + if limit is lib.no_default: + limit = None + + if "axis" in kwargs: + kwargs["axis"] = self._get_axis_number(kwargs["axis"]) + + # Attempting to match pandas error behavior here + if not isinstance(periods, int): + raise TypeError(f"periods must be an int. got {type(periods)} instead") + + # Attempting to match pandas error behavior here + for dtype in self._get_dtypes(): + if not is_numeric_dtype(dtype): + raise TypeError( + f"cannot perform pct_change on non-numeric column with dtype {dtype}" + ) + + return self.__constructor__( + query_compiler=self._query_compiler.pct_change( + periods=periods, + fill_method=fill_method, + limit=limit, + freq=freq, + **kwargs, + ) + ) + + +# Snowpark pandas has different `copy` behavior, and some different behavior with native series arguments. +@register_base_override("astype") +def astype( + self, + dtype: str | type | pd.Series | dict[str, type], + copy: bool = True, + errors: Literal["raise", "ignore"] = "raise", +) -> pd.DataFrame | pd.Series: + """ + Cast a Modin object to a specified dtype `dtype`. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + # dtype can be a series, a dict, or a scalar. If it's series or scalar, + # convert it to a dict before passing it to the query compiler. + raise_if_native_pandas_objects(dtype) + from snowflake.snowpark.modin.pandas import Series + + if isinstance(dtype, Series): + dtype = dtype.to_pandas() + if not dtype.index.is_unique: + raise ValueError( + "The new Series of types must have a unique index, i.e. " + + "it must be one-to-one mapping from column names to " + + " their new dtypes." + ) + dtype = dtype.to_dict() + # If we got a series or dict originally, dtype is a dict now. Its keys + # must be column names. + if isinstance(dtype, dict): + # Avoid materializing columns. The query compiler will handle errors where + # dtype dict includes keys that are not in columns. + col_dtypes = dtype + for col_name in col_dtypes: + if col_name not in self._query_compiler.columns: + raise KeyError( + "Only a column name can be used for the key in a dtype mappings argument. " + f"'{col_name}' not found in columns." + ) + else: + # Assume that the dtype is a scalar. + col_dtypes = {column: dtype for column in self._query_compiler.columns} + + # ensure values are pandas dtypes + col_dtypes = {k: pandas_dtype(v) for k, v in col_dtypes.items()} + new_query_compiler = self._query_compiler.astype(col_dtypes, errors=errors) + return self._create_or_update_from_compiler(new_query_compiler, not copy) + + +# Modin defaults to pandsa when `level` is specified, and has some extra axis validation that +# is guarded in newer versions. +@register_base_override("drop") +def drop( + self, + labels: IndexLabel = None, + axis: Axis = 0, + index: IndexLabel = None, + columns: IndexLabel = None, + level: Level = None, + inplace: bool = False, + errors: IgnoreRaise = "raise", +) -> BasePandasDataset | None: + """ + Drop specified labels from `BasePandasDataset`. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + inplace = validate_bool_kwarg(inplace, "inplace") + if labels is not None: + if index is not None or columns is not None: + raise ValueError("Cannot specify both 'labels' and 'index'/'columns'") + axes = {self._get_axis_number(axis): labels} + elif index is not None or columns is not None: + axes = {0: index, 1: columns} + else: + raise ValueError( + "Need to specify at least one of 'labels', 'index' or 'columns'" + ) + + for axis, labels in axes.items(): + if labels is not None: + if level is not None and not self._query_compiler.has_multiindex(axis=axis): + # Same error as native pandas. + raise AssertionError("axis must be a MultiIndex") + # According to pandas documentation, a tuple will be used as a single + # label and not treated as a list-like. + if not is_list_like(labels) or isinstance(labels, tuple): + axes[axis] = [labels] + + new_query_compiler = self._query_compiler.drop( + index=axes.get(0), columns=axes.get(1), level=level, errors=errors + ) + return self._create_or_update_from_compiler(new_query_compiler, inplace) + + +# Modin calls len(self.index) instead of a direct query compiler method. +@register_base_override("__len__") +def __len__(self) -> int: + """ + Return length of info axis. + + Returns + ------- + int + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self._query_compiler.get_axis_len(axis=0) + + +# Snowpark pandas ignores `copy`. +@register_base_override("set_axis") +def set_axis( + self, + labels: IndexLabel, + *, + axis: Axis = 0, + copy: bool | NoDefault = no_default, +): + """ + Assign desired index to given axis. + """ + # Behavior based on copy: + # ----------------------------------- + # - In native pandas, copy determines whether to create a copy of the data (not DataFrame). + # - We cannot emulate the native pandas' copy behavior in Snowpark since a copy of only data + # cannot be created -- you can only copy the whole object (DataFrame/Series). + # + # Snowpark behavior: + # ------------------ + # - copy is kept for compatibility with native pandas but is ignored. The user is warned that copy is unused. + # Warn user that copy does not do anything. + if copy is not no_default: + WarningMessage.single_warning( + message=f"{type(self).__name__}.set_axis 'copy' keyword is unused and is ignored." + ) + if labels is None: + raise TypeError("None is not a valid value for the parameter 'labels'.") + + # Determine whether to update self or a copy and perform update. + obj = self.copy() + setattr(obj, axis, labels) + return obj + + +# Modin has different behavior for empty dataframes and some slightly different length validation. +@register_base_override("describe") +def describe( + self, + percentiles: ListLike | None = None, + include: ListLike | Literal["all"] | None = None, + exclude: ListLike | None = None, +) -> BasePandasDataset: + """ + Generate descriptive statistics. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + percentiles = _refine_percentiles(percentiles) + data = self + if self._is_dataframe: + # Upstream modin lacks this check because it defaults to pandas for describing empty dataframes + if len(self.columns) == 0: + raise ValueError("Cannot describe a DataFrame without columns") + + # include/exclude are ignored for Series + if (include is None) and (exclude is None): + # when some numerics are found, keep only numerics + default_include: list[npt.DTypeLike] = [np.number] + default_include.append("datetime") + data = self.select_dtypes(include=default_include) + if len(data.columns) == 0: + data = self + elif include == "all": + if exclude is not None: + raise ValueError("exclude must be None when include is 'all'") + data = self + else: + data = self.select_dtypes( + include=include, + exclude=exclude, + ) + # Upstream modin uses data.empty, but that incurs an extra row count query + if self._is_dataframe and len(data.columns) == 0: + # Match pandas error from concatenating empty list of series descriptions. + raise ValueError("No objects to concatenate") + + return self.__constructor__( + query_compiler=data._query_compiler.describe(percentiles=percentiles) + ) + + +# Modin does type validation on self that Snowpark pandas defers to SQL. +@register_base_override("diff") +def diff(self, periods: int = 1, axis: Axis = 0): + """ + First discrete difference of element. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + # We must only accept integer (or float values that are whole numbers) + # for periods. + int_periods = validate_int_kwarg(periods, "periods", float_allowed=True) + axis = self._get_axis_number(axis) + return self.__constructor__( + query_compiler=self._query_compiler.diff(axis=axis, periods=int_periods) + ) + + +# Modin does an unnecessary len call when n == 0. +@register_base_override("tail") +def tail(self, n: int = 5): + if n == 0: + return self.iloc[0:0] + return self.iloc[-n:] + + +# Snowpark pandas does extra argument validation (which should probably be deferred to SQL instead). +@register_base_override("idxmax") +def idxmax(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, D200 + """ + Return index of first occurrence of maximum over requested axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + dtypes = self._get_dtypes() + if ( + axis == 1 + and not numeric_only + and any(not is_numeric_dtype(d) for d in dtypes) + and len(set(dtypes)) > 1 + ): + # For numeric_only=False, if we have any non-numeric dtype, e.g. + # a string type, we need every other column to be of the same type. + # We can't compare two objects of different non-numeric types, e.g. + # a string and a timestamp. + # If we have only numeric data, we can compare columns even if they + # different types, e.g. we can compare an int column to a float + # column. + raise TypeError("'>' not supported for these dtypes") + axis = self._get_axis_number(axis) + return self._reduce_dimension( + self._query_compiler.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only) + ) + + +# Snowpark pandas does extra argument validation (which should probably be deferred to SQL instead). +@register_base_override("idxmin") +def idxmin(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, D200 + """ + Return index of first occurrence of minimum over requested axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + dtypes = self._get_dtypes() + if ( + axis == 1 + and not numeric_only + and any(not is_numeric_dtype(d) for d in dtypes) + and len(set(dtypes)) > 1 + ): + # For numeric_only=False, if we have any non-numeric dtype, e.g. + # a string type, we need every other column to be of the same type. + # We can't compare two objects of different non-numeric types, e.g. + # a string and a timestamp. + # If we have only numeric data, we can compare columns even if they + # different types, e.g. we can compare an int column to a float + # column. + raise TypeError("'<' not supported for these dtypes") + axis = self._get_axis_number(axis) + return self._reduce_dimension( + self._query_compiler.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only) + ) + + +# Modin does dtype validation on unary ops that Snowpark pandas does not. +@register_base_override("__abs__") +def abs(self): # noqa: RT01, D200 + """ + Return a `BasePandasDataset` with absolute numeric value of each element. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self.__constructor__(query_compiler=self._query_compiler.abs()) + + +# Modin does dtype validation on unary ops that Snowpark pandas does not. +@register_base_override("__invert__") +def __invert__(self): + """ + Apply bitwise inverse to each element of the `BasePandasDataset`. + + Returns + ------- + BasePandasDataset + New BasePandasDataset containing bitwise inverse to each value. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self.__constructor__(query_compiler=self._query_compiler.invert()) + + +# Modin does dtype validation on unary ops that Snowpark pandas does not. +@register_base_override("__neg__") +def __neg__(self): + """ + Change the sign for every value of self. + + Returns + ------- + BasePandasDataset + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self.__constructor__(query_compiler=self._query_compiler.negative()) + + +# Modin needs to add a check for mapper is not None, which changes query counts in test_concat.py +# if not present. +@register_base_override("rename_axis") +def rename_axis( + self, + mapper=lib.no_default, + *, + index=lib.no_default, + columns=lib.no_default, + axis=0, + copy=None, + inplace=False, +): # noqa: PR01, RT01, D200 + """ + Set the name of the axis for the index or columns. + """ + axes = {"index": index, "columns": columns} + + if copy is None: + copy = True + + if axis is not None: + axis = self._get_axis_number(axis) + + inplace = validate_bool_kwarg(inplace, "inplace") + + if mapper is not lib.no_default and mapper is not None: + # Use v0.23 behavior if a scalar or list + non_mapper = is_scalar(mapper) or ( + is_list_like(mapper) and not is_dict_like(mapper) + ) + if non_mapper: + return self._set_axis_name(mapper, axis=axis, inplace=inplace) + else: + raise ValueError("Use `.rename` to alter labels with a mapper.") + else: + # Use new behavior. Means that index and/or columns is specified + result = self if inplace else self.copy(deep=copy) + + for axis in range(self.ndim): + v = axes.get(pandas.DataFrame._get_axis_name(axis)) + if v is lib.no_default: + continue + non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) + if non_mapper: + newnames = v + else: + + def _get_rename_function(mapper): + if isinstance(mapper, (dict, BasePandasDataset)): + + def f(x): + if x in mapper: + return mapper[x] + else: + return x + + else: + f = mapper + + return f + + f = _get_rename_function(v) + curnames = self.index.names if axis == 0 else self.columns.names + newnames = [f(name) for name in curnames] + result._set_axis_name(newnames, axis=axis, inplace=True) + if not inplace: + return result + + +# Snowpark pandas has custom dispatch logic for ufuncs, while modin defaults to pandas. +@register_base_override("__array_ufunc__") +def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + Apply the `ufunc` to the `BasePandasDataset`. + + Parameters + ---------- + ufunc : np.ufunc + The NumPy ufunc to apply. + method : str + The method to apply. + *inputs : tuple + The inputs to the ufunc. + **kwargs : dict + Additional keyword arguments. + + Returns + ------- + BasePandasDataset + The result of the ufunc applied to the `BasePandasDataset`. + """ + # Use pandas version of ufunc if it exists + if method != "__call__": + # Return sentinel value NotImplemented + return NotImplemented # pragma: no cover + from snowflake.snowpark.modin.plugin.utils.numpy_to_pandas import ( + numpy_to_pandas_universal_func_map, + ) + + if ufunc.__name__ in numpy_to_pandas_universal_func_map: + ufunc = numpy_to_pandas_universal_func_map[ufunc.__name__] + return ufunc(self, inputs[1:], kwargs) + # return the sentinel NotImplemented if we do not support this function + return NotImplemented # pragma: no cover + + +# Snowpark pandas does extra argument validation. +@register_base_override("reindex") +def reindex( + self, + index=None, + columns=None, + copy=True, + **kwargs, +): # noqa: PR01, RT01, D200 + """ + Conform `BasePandasDataset` to new index with optional filling logic. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + if kwargs.get("limit", None) is not None and kwargs.get("method", None) is None: + raise ValueError( + "limit argument only valid if doing pad, backfill or nearest reindexing" + ) + new_query_compiler = None + if index is not None: + if not isinstance(index, pandas.Index) or not index.equals(self.index): + new_query_compiler = self._query_compiler.reindex( + axis=0, labels=index, **kwargs + ) + if new_query_compiler is None: + new_query_compiler = self._query_compiler + final_query_compiler = None + if columns is not None: + if not isinstance(index, pandas.Index) or not columns.equals(self.columns): + final_query_compiler = new_query_compiler.reindex( + axis=1, labels=columns, **kwargs + ) + if final_query_compiler is None: + final_query_compiler = new_query_compiler + return self._create_or_update_from_compiler( + final_query_compiler, inplace=False if copy is None else not copy + ) + + +# No direct override annotation; used as part of `property`. +# Snowpark pandas may return a custom lazy index object. +def _get_index(self): + """ + Get the index for this DataFrame. + + Returns + ------- + pandas.Index + The union of all indexes across the partitions. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + from snowflake.snowpark.modin.plugin.extensions.index import Index + + if self._query_compiler.is_multiindex(): + # Lazy multiindex is not supported + return self._query_compiler.index + + idx = Index(query_compiler=self._query_compiler) + idx._set_parent(self) + return idx + + +# No direct override annotation; used as part of `property`. +# Snowpark pandas may return a custom lazy index object. +def _set_index(self, new_index: Axes) -> None: + """ + Set the index for this DataFrame. + + Parameters + ---------- + new_index : pandas.Index + The new index to set this. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + self._update_inplace( + new_query_compiler=self._query_compiler.set_index( + [s._query_compiler for s in self._to_series_list(ensure_index(new_index))] + ) + ) + + +# Snowpark pandas may return a custom lazy index object. +register_base_override("index")(property(_get_index, _set_index)) diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py index 95fcf684924..808489b8917 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/index.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/index.py @@ -29,6 +29,7 @@ import modin import numpy as np import pandas as native_pd +from modin.pandas.base import BasePandasDataset from pandas import get_option from pandas._libs import lib from pandas._libs.lib import is_list_like, is_scalar @@ -48,7 +49,6 @@ from pandas.core.dtypes.inference import is_hashable from snowflake.snowpark.modin.pandas import DataFrame, Series -from snowflake.snowpark.modin.pandas.base import BasePandasDataset from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta from snowflake.snowpark.modin.plugin._internal.timestamp_utils import DateTimeOrigin diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py index 0afea30e29a..a33d7702203 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py @@ -161,6 +161,7 @@ def plot( @register_series_accessor("transform") +@snowpark_pandas_telemetry_method_decorator @series_not_implemented() def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 pass # pragma: no cover diff --git a/src/snowflake/snowpark/modin/plugin/utils/frontend_constants.py b/src/snowflake/snowpark/modin/plugin/utils/frontend_constants.py new file mode 100644 index 00000000000..f2b28e8bfc1 --- /dev/null +++ b/src/snowflake/snowpark/modin/plugin/utils/frontend_constants.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +# Do not look up certain attributes in columns or index, as they're used for some +# special purposes, like serving remote context +# TODO: SNOW-1643986 examine whether to update upstream modin to follow this +_ATTRS_NO_LOOKUP = { + "____id_pack__", + "__name__", + "_cache", + "_ipython_canary_method_should_not_exist_", + "_ipython_display_", + "_repr_html_", + "_repr_javascript_", + "_repr_jpeg_", + "_repr_json_", + "_repr_latex_", + "_repr_markdown_", + "_repr_mimebundle_", + "_repr_pdf_", + "_repr_png_", + "_repr_svg_", + "__array_struct__", + "__array_interface__", + "_typ", +} diff --git a/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py index 3da545c64b6..f673bf157bf 100644 --- a/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py +++ b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py @@ -3,8 +3,9 @@ # from typing import Any, Optional, Union +from modin.pandas.base import BasePandasDataset + import snowflake.snowpark.modin.pandas as pd -from snowflake.snowpark.modin.pandas.base import BasePandasDataset from snowflake.snowpark.modin.pandas.utils import is_scalar from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage diff --git a/src/snowflake/snowpark/modin/plugin/utils/warning_message.py b/src/snowflake/snowpark/modin/plugin/utils/warning_message.py index 80805d72ac7..8bb85d51751 100644 --- a/src/snowflake/snowpark/modin/plugin/utils/warning_message.py +++ b/src/snowflake/snowpark/modin/plugin/utils/warning_message.py @@ -51,7 +51,7 @@ def ignored_argument(cls, operation: str, argument: str, message: str) -> None: @classmethod def mismatch_with_pandas(cls, operation: str, message: str) -> None: cls.single_warning( - f"`{operation}` implementation has mismatches with pandas:\n{message}." + f"`{operation}` implementation may have mismatches with pandas:\n{message}." ) @classmethod diff --git a/tests/integ/compiler/test_query_generator.py b/tests/integ/compiler/test_query_generator.py index 507b338d6e7..5ce4c005ad3 100644 --- a/tests/integ/compiler/test_query_generator.py +++ b/tests/integ/compiler/test_query_generator.py @@ -197,7 +197,7 @@ def test_table_create_from_large_query_breakdown(session, plan_source_generator) assert ( queries[PlanQueryType.QUERIES][0].sql - == f" CREATE TEMP TABLE {table_name} AS SELECT * FROM (select 1 as a, 2 as b)" + == f" CREATE SCOPED TEMPORARY TABLE {table_name} AS SELECT * FROM (select 1 as a, 2 as b)" ) diff --git a/tests/integ/modin/binary/test_binary_op.py b/tests/integ/modin/binary/test_binary_op.py index 9ae5db98369..cd036bcb045 100644 --- a/tests/integ/modin/binary/test_binary_op.py +++ b/tests/integ/modin/binary/test_binary_op.py @@ -2586,3 +2586,26 @@ def test_df_sub_series(): eval_snowpark_pandas_result( snow_df, native_df, lambda df: df.sub(df["two"], axis="index"), inplace=True ) + + +@sql_count_checker(query_count=2, join_count=0) +def test_binary_op_multi_series_from_same_df(): + native_df = native_pd.DataFrame( + { + "A": [1, 2, 3], + "B": [2, 3, 4], + "C": [4, 5, 6], + "D": [2, 2, 3], + }, + index=["a", "b", "c"], + ) + snow_df = pd.DataFrame(native_df) + # ensure performing more than one binary operation for series coming from same + # dataframe does not produce any join. + eval_snowpark_pandas_result( + snow_df, native_df, lambda df: df["A"] + df["B"] + df["C"] + ) + # perform binary operations in different orders + eval_snowpark_pandas_result( + snow_df, native_df, lambda df: (df["A"] + df["B"]) + (df["C"] + df["D"]) + ) diff --git a/tests/integ/modin/frame/test_astype.py b/tests/integ/modin/frame/test_astype.py index 0c1d1faa31c..8007b264b4e 100644 --- a/tests/integ/modin/frame/test_astype.py +++ b/tests/integ/modin/frame/test_astype.py @@ -126,7 +126,7 @@ def test_astype_to_timedelta(dtype): eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.astype(dtype)) -@sql_count_checker(query_count=2) +@sql_count_checker(query_count=0) def test_astype_to_timedelta_negative(): native_datetime_df = native_pd.DataFrame( data={"col1": [pd.to_datetime("2000-01-01"), pd.to_datetime("2001-01-01")]} diff --git a/tests/integ/modin/groupby/test_all_any.py b/tests/integ/modin/groupby/test_all_any.py index 8c8e4e6cb89..e423f28cf0d 100644 --- a/tests/integ/modin/groupby/test_all_any.py +++ b/tests/integ/modin/groupby/test_all_any.py @@ -8,6 +8,7 @@ import modin.pandas as pd import numpy as np +import pandas as native_pd import pytest import snowflake.snowpark.modin.plugin # noqa: F401 @@ -37,6 +38,23 @@ def test_all_any_basic(data): ) +@pytest.mark.parametrize("agg_func", ["all", "any"]) +@pytest.mark.parametrize("by", ["A", "B"]) +@sql_count_checker(query_count=1) +def test_timedelta(agg_func, by): + native_df = native_pd.DataFrame( + { + "A": native_pd.to_timedelta(["1 days 06:05:01.00003", "15.5us", "10"]), + "B": [10, 8, 12], + } + ) + snow_df = pd.DataFrame(native_df) + + eval_snowpark_pandas_result( + snow_df, native_df, lambda df: getattr(df.groupby(by), agg_func)() + ) + + @sql_count_checker(query_count=2) def test_all_any_empty(): data = {"nn": [11, 11, 22, 22]} diff --git a/tests/integ/modin/groupby/test_groupby_basic_agg.py b/tests/integ/modin/groupby/test_groupby_basic_agg.py index 915d098cb90..68cd73ad9ed 100644 --- a/tests/integ/modin/groupby/test_groupby_basic_agg.py +++ b/tests/integ/modin/groupby/test_groupby_basic_agg.py @@ -1094,3 +1094,31 @@ def test_valid_func_valid_kwarg_should_work(basic_snowpark_pandas_df): basic_snowpark_pandas_df.groupby("col1").agg(max, min_count=2), basic_snowpark_pandas_df.to_pandas().groupby("col1").max(min_count=2), ) + + +@pytest.mark.parametrize( + "agg_func", + [ + "count", + "sum", + "mean", + "median", + "std", + ], +) +@pytest.mark.parametrize("by", ["A", "B"]) +@sql_count_checker(query_count=1) +def test_timedelta(agg_func, by): + native_df = native_pd.DataFrame( + { + "A": native_pd.to_timedelta( + ["1 days 06:05:01.00003", "15.5us", "nan", "16us"] + ), + "B": [8, 8, 12, 10], + } + ) + snow_df = pd.DataFrame(native_df) + + eval_snowpark_pandas_result( + snow_df, native_df, lambda df: getattr(df.groupby(by), agg_func)() + ) diff --git a/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py b/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py index d26dfc31f19..bc62278d581 100644 --- a/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py +++ b/tests/integ/modin/groupby/test_groupby_idxmax_idxmin.py @@ -158,3 +158,22 @@ def grouper(c): native_res = df.groupby(by=grouper, axis=1).idxmax(axis=0) snow_res = pd.DataFrame(df).groupby(by=grouper, axis=1).idxmax(axis=0) assert_frame_equal(native_res, snow_res, check_index_type=False) + + +@pytest.mark.parametrize("agg_func", ["idxmin", "idxmax"]) +@pytest.mark.parametrize("by", ["A", "B"]) +@sql_count_checker(query_count=1) +def test_timedelta(agg_func, by): + native_df = native_pd.DataFrame( + { + "A": native_pd.to_timedelta( + ["1 days 06:05:01.00003", "15.5us", "nan", "16us"] + ), + "B": [8, 8, 12, 10], + } + ) + snow_df = pd.DataFrame(native_df) + + eval_snowpark_pandas_result( + snow_df, native_df, lambda df: getattr(df.groupby(by), agg_func)() + ) diff --git a/tests/integ/modin/groupby/test_groupby_negative.py b/tests/integ/modin/groupby/test_groupby_negative.py index eeddd0e6c29..a009e1089b0 100644 --- a/tests/integ/modin/groupby/test_groupby_negative.py +++ b/tests/integ/modin/groupby/test_groupby_negative.py @@ -556,3 +556,23 @@ def test_groupby_agg_invalid_min_count( getattr(basic_snowpark_pandas_df.groupby("col1"), min_count_method)( min_count=min_count ) + + +@sql_count_checker(query_count=0) +def test_groupby_var_no_support_for_timedelta(): + native_df = native_pd.DataFrame( + { + "A": native_pd.to_timedelta( + ["1 days 06:05:01.00003", "15.5us", "nan", "16us"] + ), + "B": [8, 8, 12, 10], + } + ) + snow_df = pd.DataFrame(native_df) + with pytest.raises( + NotImplementedError, + match=re.escape( + "SnowflakeQueryCompiler::groupby_agg is not yet implemented for Timedelta Type" + ), + ): + snow_df.groupby("B").var() diff --git a/tests/integ/modin/groupby/test_groupby_nunique.py b/tests/integ/modin/groupby/test_groupby_nunique.py index a70e18bf3cb..e536696ed0d 100644 --- a/tests/integ/modin/groupby/test_groupby_nunique.py +++ b/tests/integ/modin/groupby/test_groupby_nunique.py @@ -80,3 +80,24 @@ def test_groupby_nunique(df, groupby_columns, dropna): {"value1": "count", "value2": "nunique"}, dropna=dropna ), ) + + +@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) +@sql_count_checker(query_count=1) +def test_timedelta(by): + native_df = native_pd.DataFrame( + { + "A": native_pd.to_timedelta( + ["1 days 06:05:01.00003", "15.5us", "nan", "16us"] + ), + "B": [8, 8, 12, 10], + "C": ["the", "name", "is", "bond"], + } + ) + snow_df = pd.DataFrame(native_df) + + eval_snowpark_pandas_result( + snow_df, + native_df, + lambda df: df.groupby(by).nunique(), + ) diff --git a/tests/integ/modin/groupby/test_groupby_size.py b/tests/integ/modin/groupby/test_groupby_size.py index 45a353cf420..8d2b9226d04 100644 --- a/tests/integ/modin/groupby/test_groupby_size.py +++ b/tests/integ/modin/groupby/test_groupby_size.py @@ -3,6 +3,7 @@ # import modin.pandas as pd import numpy as np +import pandas as native_pd import pytest import snowflake.snowpark.modin.plugin # noqa: F401 @@ -89,3 +90,24 @@ def test_error_checking(): s = pd.Series(list("abc") * 4) with pytest.raises(NotImplementedError): s.groupby(s).size() + + +@pytest.mark.parametrize("by", ["A", "B"]) +@sql_count_checker(query_count=1) +def test_timedelta(by): + native_df = native_pd.DataFrame( + { + "A": native_pd.to_timedelta( + ["1 days 06:05:01.00003", "15.5us", "nan", "16us"] + ), + "B": [8, 8, 12, 10], + "C": ["the", "name", "is", "bond"], + } + ) + snow_df = pd.DataFrame(native_df) + + eval_snowpark_pandas_result( + snow_df, + native_df, + lambda df: df.groupby(by).size(), + ) diff --git a/tests/integ/modin/groupby/test_min_max.py b/tests/integ/modin/groupby/test_min_max.py index e5d8ad2cc6a..ce116c55b3a 100644 --- a/tests/integ/modin/groupby/test_min_max.py +++ b/tests/integ/modin/groupby/test_min_max.py @@ -175,3 +175,23 @@ def test_min_max_with_mixed_str_numeric_type(): ) expected_df = expected_df.set_index("col_grp") assert_frame_equal(result_min, expected_df, check_dtype=False) + + +@pytest.mark.parametrize("agg_func", ["min", "max"]) +@pytest.mark.parametrize("by", ["A", "B"]) +@sql_count_checker(query_count=1) +def test_timedelta(agg_func, by): + native_df = native_pd.DataFrame( + { + "A": native_pd.to_timedelta( + ["1 days 06:05:01.00003", "15.5us", "nan", "16us"] + ), + "B": [8, 8, 12, 10], + "C": ["the", "name", "is", "bond"], + } + ) + snow_df = pd.DataFrame(native_df) + + eval_snowpark_pandas_result( + snow_df, native_df, lambda df: getattr(df.groupby(by), agg_func)() + ) diff --git a/tests/integ/modin/series/test_astype.py b/tests/integ/modin/series/test_astype.py index 9c00e9a675d..5bbce79b01b 100644 --- a/tests/integ/modin/series/test_astype.py +++ b/tests/integ/modin/series/test_astype.py @@ -418,7 +418,7 @@ def test_astype_to_timedelta(data): ) -@sql_count_checker(query_count=2) +@sql_count_checker(query_count=0) def test_astype_to_timedelta_negative(): native_datetime_series = native_pd.Series( data=[pd.to_datetime("2000-01-01"), pd.to_datetime("2001-01-01")] diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py index 7e11a3537af..c1366c22506 100644 --- a/tests/integ/modin/test_concat.py +++ b/tests/integ/modin/test_concat.py @@ -1063,3 +1063,40 @@ def test_concat_keys(): } snow_df = pd.concat(data.values(), axis=1, keys=data.keys()) assert_frame_equal(snow_df, native_df, check_dtype=False) + + +@sql_count_checker(query_count=4, join_count=0) +def test_concat_series_from_same_df(join): + num_cols = 4 + select_data = [f'{i} as "{i}"' for i in range(num_cols)] + query = f"select {', '.join(select_data)}" + + # concat today uses join_on_index to concat all series, we use + # read_snowflake here so that the default index is created and + # managed by snowpark pandas, which is the same as row position + # column. This creates a valid optimization scenario for join, where + # join performed on the same row_position column doesn't require + # actual join. + # This can not be done with pd.DataFrame constructor because the index + # and row position column is controlled by client side, which are + # different columns. + df = pd.read_snowflake(query) + + series = [df[col] for col in df.columns] + final_df = pd.concat(series, join=join, axis=1) + + assert_frame_equal(df, final_df) + + +@sql_count_checker(query_count=4, join_count=0) +def test_df_creation_from_series_from_same_df(): + num_cols = 6 + select_data = [f'{i} as "{i}"' for i in range(num_cols)] + query = f"select {', '.join(select_data)}" + + df = pd.read_snowflake(query) + + df_dict = {col: df[col] for col in df.columns} + final_df = pd.DataFrame(df_dict) + + assert_frame_equal(df, final_df) diff --git a/tests/integ/modin/test_telemetry.py b/tests/integ/modin/test_telemetry.py index 9c24c6b6853..06fbc71eec7 100644 --- a/tests/integ/modin/test_telemetry.py +++ b/tests/integ/modin/test_telemetry.py @@ -398,7 +398,7 @@ def test_telemetry_getitem_setitem(): s = df["a"] assert len(df._query_compiler.snowpark_pandas_api_calls) == 0 assert s._query_compiler.snowpark_pandas_api_calls == [ - {"name": "DataFrame.BasePandasDataset.__getitem__"} + {"name": "DataFrame.__getitem__"} ] df["a"] = 0 df["b"] = 0 @@ -412,12 +412,12 @@ def test_telemetry_getitem_setitem(): # the telemetry log from the connector to validate _ = s[0] data = _extract_snowpark_pandas_telemetry_log_data( - expected_func_name="Series.BasePandasDataset.__getitem__", + expected_func_name="Series.__getitem__", session=s._query_compiler._modin_frame.ordered_dataframe.session, ) assert data["api_calls"] == [ - {"name": "DataFrame.BasePandasDataset.__getitem__"}, - {"name": "Series.BasePandasDataset.__getitem__"}, + {"name": "DataFrame.__getitem__"}, + {"name": "Series.__getitem__"}, ] @@ -547,3 +547,18 @@ def test_telemetry_repr(): {"name": "Series.property.name_set"}, {"name": "Series.Series.__repr__"}, ] + + +@sql_count_checker(query_count=0) +def test_telemetry_copy(): + # copy() is defined in upstream modin's BasePandasDataset class, and not overridden by any + # child class or the extensions module. + s = pd.Series([1, 2, 3, 4]) + copied = s.copy() + assert s._query_compiler.snowpark_pandas_api_calls == [ + {"name": "Series.property.name_set"} + ] + assert copied._query_compiler.snowpark_pandas_api_calls == [ + {"name": "Series.property.name_set"}, + {"name": "Series.BasePandasDataset.copy"}, + ] diff --git a/tests/integ/modin/tools/test_to_datetime.py b/tests/integ/modin/tools/test_to_datetime.py index a0ac55958a9..1ea3445d15a 100644 --- a/tests/integ/modin/tools/test_to_datetime.py +++ b/tests/integ/modin/tools/test_to_datetime.py @@ -104,7 +104,7 @@ def test_to_datetime_format(self, cache, box, format, expected): ["1/3/2000", "20000103", "%m/%d/%Y"], ], ) - @sql_count_checker(query_count=1) + @sql_count_checker(query_count=0) def test_to_datetime_format_scalar(self, cache, arg, expected, format): result = to_datetime(arg, format=format, cache=cache) expected = Timestamp(expected) @@ -120,7 +120,7 @@ def test_to_datetime_format_scalar(self, cache, arg, expected, format): def test_to_datetime_format_unimplemented(self, cache, arg, format): with pytest.raises(NotImplementedError): assert to_datetime( - arg, format=format, cache=cache + pd.Index([arg]), format=format, cache=cache ) == native_pd.to_datetime(arg, format=format, cache=cache) @pytest.mark.parametrize( @@ -135,7 +135,7 @@ def test_to_datetime_format_not_match(self, cache, arg, format): SnowparkSQLException, match=f"Can't parse '{arg}' as timestamp with format 'DD/MM/YYYY'", ): - to_datetime(arg, format=format, cache=cache) + to_datetime(pd.Index([arg]), format=format, cache=cache).to_pandas() @sql_count_checker(query_count=2, udf_count=0) def test_to_datetime_format_YYYYMMDD(self, cache): @@ -302,7 +302,7 @@ def test_to_datetime_format_YYYYMMDD_overflow(self, input, expected): @sql_count_checker(query_count=2) def test_to_datetime_with_NA(self, data, format, expected): # GH#42957 - result = to_datetime(data, format=format) + result = to_datetime(pd.Index(data), format=format) assert_index_equal(result, pd.DatetimeIndex(expected)) @sql_count_checker(query_count=1, udf_count=0) @@ -328,7 +328,7 @@ def test_to_datetime_format_integer_year_month(self, cache): result = to_datetime(ser, format="%Y%m", cache=cache) assert_series_equal(result, expected, check_index_type=False) - @sql_count_checker(query_count=1) + @sql_count_checker(query_count=0) def test_to_datetime_format_microsecond(self, cache): month_abbr = calendar.month_abbr[4] val = f"01-{month_abbr}-2011 00:00:01.978" @@ -384,7 +384,9 @@ def test_to_datetime_format_microsecond(self, cache): ) @sql_count_checker(query_count=1) def test_to_datetime_format_time(self, cache, value, format, dt): - assert to_datetime(value, format=format, cache=cache) == dt + assert ( + to_datetime(pd.Index([value]), format=format, cache=cache).to_pandas() == dt + ) @sql_count_checker(query_count=0) def test_to_datetime_with_non_exact_unimplemented(self, cache): @@ -407,9 +409,9 @@ def test_to_datetime_with_non_exact_unimplemented(self, cache): "2012-01-01 09:00:00.001000000", ], ) - @sql_count_checker(query_count=2) + @sql_count_checker(query_count=1, join_count=1) def test_parse_nanoseconds_with_formula(self, cache, arg): - + arg = pd.Index([arg]) # GH8989 # truncating the nanoseconds when a format was provided expected = to_datetime(arg, cache=cache) @@ -426,7 +428,10 @@ def test_parse_nanoseconds_with_formula(self, cache, arg): @sql_count_checker(query_count=0) def test_to_datetime_format_weeks(self, value, fmt, expected, cache): with pytest.raises(NotImplementedError): - assert to_datetime(value, format=fmt, cache=cache) == expected + assert ( + to_datetime(pd.Index([value]), format=fmt, cache=cache).to_pandas()[0] + == expected + ) @pytest.mark.parametrize( "fmt,dates,expected_dates", @@ -497,7 +502,7 @@ def test_to_datetime_parse_tzname_or_tzoffset_fallback( ): # GH 13486 with pytest.raises(NotImplementedError): - to_datetime(dates, format=fmt).to_list() + to_datetime(pd.Index(dates), format=fmt).to_list() @sql_count_checker(query_count=4) def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): @@ -535,7 +540,7 @@ def test_to_datetime_parse_timezone_malformed(self, offset): SnowparkSQLException, match="Can't parse|as timestamp with format 'YYYY-MM-DD HH24:MI:SS TZHTZM'", ): - to_datetime([date], format=fmt).to_pandas() + to_datetime(pd.Index([date]), format=fmt).to_pandas() @sql_count_checker(query_count=0) def test_to_datetime_parse_timezone_keeps_name(self): @@ -551,7 +556,7 @@ class TestToDatetime: def test_to_datetime_mixed_datetime_and_string(self): d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1))) d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1))) - res = to_datetime(["2020-01-01 17:00:00 -0100", d2]) + res = to_datetime(pd.Index(["2020-01-01 17:00:00 -0100", d2])) # The input will become a series with variant type and the timezone is unaware by the Snowflake engine, so the # result ignores the timezone by default expected = native_pd.DatetimeIndex( @@ -559,7 +564,7 @@ def test_to_datetime_mixed_datetime_and_string(self): ) assert_index_equal(res, expected) # Set utc=True to make sure timezone aware in to_datetime - res = to_datetime(["2020-01-01 17:00:00 -0100", d2], utc=True) + res = to_datetime(pd.Index(["2020-01-01 17:00:00 -0100", d2]), utc=True) expected = pd.DatetimeIndex([d1, d2]) assert_index_equal(res, expected) @@ -584,15 +589,15 @@ def test_to_datetime_dtarr(self, tz): @sql_count_checker(query_count=1) def test_to_datetime_pydatetime(self): - actual = to_datetime(datetime(2008, 1, 15)) + actual = to_datetime(pd.Index([datetime(2008, 1, 15)])) assert actual == np.datetime64(datetime(2008, 1, 15)) @pytest.mark.parametrize( "dt", [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")] ) - @sql_count_checker(query_count=1) + @sql_count_checker(query_count=1, join_count=2) def test_to_datetime_dt64s(self, cache, dt): - assert to_datetime(dt, cache=cache) == Timestamp(dt) + assert to_datetime(pd.Index([dt]), cache=cache)[0] == Timestamp(dt) @pytest.mark.parametrize( "sample", @@ -831,11 +836,11 @@ def test_to_datetime_df_negative(self): {"arg": 1490195805433502912, "unit": "ns"}, ], ) - @sql_count_checker(query_count=1) + @sql_count_checker(query_count=1, join_count=2) def test_to_datetime_unit(self, sample): - assert pd.to_datetime( - sample["arg"], unit=sample["unit"] - ) == native_pd.to_datetime(sample["arg"], unit=sample["unit"]) + assert pd.to_datetime(pd.Index([sample["arg"]]), unit=sample["unit"])[ + 0 + ] == native_pd.to_datetime(sample["arg"], unit=sample["unit"]) @sql_count_checker(query_count=0) def test_to_datetime_unit_negative(self): diff --git a/tests/integ/modin/types/test_timedelta.py b/tests/integ/modin/types/test_timedelta.py index bcae016cbf0..2e630e09d3d 100644 --- a/tests/integ/modin/types/test_timedelta.py +++ b/tests/integ/modin/types/test_timedelta.py @@ -105,6 +105,6 @@ def test_timedelta_not_supported(): ) with pytest.raises( NotImplementedError, - match="validate_groupby is not yet implemented for Timedelta Type", + match="SnowflakeQueryCompiler::groupby_first is not yet implemented for Timedelta Type", ): - df.groupby("a").count() + df.groupby("a").first() diff --git a/tests/integ/modin/types/test_timedelta_indexing.py b/tests/integ/modin/types/test_timedelta_indexing.py index 7c8bbcb8a10..af36b319a26 100644 --- a/tests/integ/modin/types/test_timedelta_indexing.py +++ b/tests/integ/modin/types/test_timedelta_indexing.py @@ -389,3 +389,183 @@ def loc_enlargement(key, item, df): loc_enlargement(key, item, snow_td.copy()).to_pandas().dtypes, snow_td.dtypes, ) + + +@pytest.mark.parametrize( + "key, join_count", + [(2, 2), ([2, 1], 2), (slice(1, None), 0), ([True, False, False, True], 1)], +) +def test_index_get_timedelta(key, join_count): + td_idx = native_pd.TimedeltaIndex( + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("2 days 1 minute"), + native_pd.Timedelta("3 days 1 nanoseconds"), + native_pd.Timedelta("100 nanoseconds"), + ] + ) + snow_td_idx = pd.TimedeltaIndex(td_idx) + + with SqlCounter(query_count=1, join_count=join_count): + if is_scalar(key): + assert snow_td_idx[key] == td_idx[key] + else: + eval_snowpark_pandas_result(snow_td_idx, td_idx, lambda idx: idx[key]) + + +@pytest.mark.parametrize( + "key, api, query_count, join_count", + [ + [2, "iat", 1, 2], + [native_pd.Timedelta("1 days 1 hour"), "at", 2, 2], + [[2, 1], "iloc", 1, 2], + [ + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("1 days 1 hour"), + ], + "loc", + 1, + 1, + ], + [slice(1, None), "iloc", 1, 0], + [[True, False, False, True], "iloc", 1, 1], + [[True, False, False, True], "loc", 1, 1], + ], +) +def test_series_with_timedelta_index(key, api, query_count, join_count): + td_idx = native_pd.TimedeltaIndex( + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("2 days 1 minute"), + native_pd.Timedelta("3 days 1 nanoseconds"), + native_pd.Timedelta("100 nanoseconds"), + ] + ) + snow_td_idx = pd.TimedeltaIndex(td_idx) + + data = [1, 2, 3, 4] + native_series = native_pd.Series(data, index=td_idx) + snow_series = pd.Series(data, index=snow_td_idx) + + with SqlCounter(query_count=query_count, join_count=join_count): + if is_scalar(key): + assert getattr(snow_series, api)[key] == getattr(native_series, api)[key] + else: + eval_snowpark_pandas_result( + snow_series, native_series, lambda s: getattr(s, api)[key] + ) + + +@pytest.mark.parametrize( + "key, api, query_count, join_count", + [ + [2, "iat", 1, 2], + [native_pd.Timedelta("1 days 1 hour"), "at", 2, 2], + [[2, 1], "iloc", 1, 2], + [ + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("1 days 1 hour"), + ], + "loc", + 1, + 1, + ], + [slice(1, None), "iloc", 1, 0], + [[True, False, False, True], "iloc", 1, 1], + [[True, False, False, True], "loc", 1, 1], + ], +) +def test_df_with_timedelta_index(key, api, query_count, join_count): + td_idx = native_pd.TimedeltaIndex( + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("2 days 1 minute"), + native_pd.Timedelta("3 days 1 nanoseconds"), + native_pd.Timedelta("100 nanoseconds"), + ] + ) + snow_td_idx = pd.TimedeltaIndex(td_idx) + + data = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + native_df = native_pd.DataFrame(data, index=td_idx) + snow_df = pd.DataFrame(data, index=snow_td_idx) + + with SqlCounter(query_count=query_count, join_count=join_count): + if is_scalar(key): + assert getattr(snow_df, api)[key, 0] == getattr(native_df, api)[key, 0] + else: + eval_snowpark_pandas_result( + snow_df, native_df, lambda s: getattr(s, api)[key] + ) + + +def test_df_with_timedelta_index_enlargement_during_indexing(): + td_idx = native_pd.TimedeltaIndex( + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("2 days 1 minute"), + native_pd.Timedelta("3 days 1 nanoseconds"), + native_pd.Timedelta("100 nanoseconds"), + ] + ) + snow_td_idx = pd.TimedeltaIndex(td_idx) + + data = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + cols = ["a", "b", "c", "d"] + native_df = native_pd.DataFrame(data, index=td_idx, columns=cols) + snow_df = pd.DataFrame(data, index=snow_td_idx, columns=cols) + + def setitem_enlargement(key, item, df): + df[key] = item + return df + + item = 23 + + key = native_pd.Timedelta("2 days") + with SqlCounter(query_count=1, join_count=0): + eval_snowpark_pandas_result( + snow_df.copy(), + native_df.copy(), + functools.partial(setitem_enlargement, key, item), + ) + + key = native_pd.Timedelta("2 days 45 minutes") + with SqlCounter(query_count=1, join_count=1): + eval_snowpark_pandas_result( + snow_df["a"].copy(), + native_df["a"].copy(), + functools.partial(setitem_enlargement, key, item), + ) + + def loc_enlargement(key, item, df): + df.loc[key] = item + return df + + key = (slice(None, None, None), "x") + + with SqlCounter(query_count=1, join_count=0): + eval_snowpark_pandas_result( + snow_df.copy(), + native_df.copy(), + functools.partial(loc_enlargement, key, item), + ) + + key = native_pd.Timedelta("2 days 25 minutes") + with SqlCounter(query_count=1, join_count=1): + eval_snowpark_pandas_result( + snow_df["a"].copy(), + native_df["a"].copy(), + functools.partial(loc_enlargement, key, item), + ) + + # single row + key = (native_pd.Timedelta("2 days 45 minutes"), slice(None, None, None)) + + with SqlCounter(query_count=1, join_count=1): + eval_snowpark_pandas_result( + snow_df.copy(), + native_df.copy(), + functools.partial(loc_enlargement, key, item), + ) diff --git a/tests/integ/scala/test_snowflake_plan_suite.py b/tests/integ/scala/test_snowflake_plan_suite.py index 25ee097d27b..e5971e2d2f5 100644 --- a/tests/integ/scala/test_snowflake_plan_suite.py +++ b/tests/integ/scala/test_snowflake_plan_suite.py @@ -317,7 +317,7 @@ def test_create_scoped_temp_table(session): ) .queries[0] .sql - == f" CREATE TEMP TABLE {temp_table_name} AS SELECT * FROM ( SELECT * FROM ({table_name}))" + == f" CREATE TEMPORARY TABLE {temp_table_name} AS SELECT * FROM ( SELECT * FROM ({table_name}))" ) expected_sql = f' CREATE TEMPORARY TABLE {temp_table_name}("NUM" BIGINT, "STR" STRING(8))' assert expected_sql in ( @@ -342,7 +342,9 @@ def test_create_scoped_temp_table(session): .queries[0] .sql ) - expected_sql = f" CREATE TEMPORARY TABLE {temp_table_name} AS SELECT" + expected_sql = ( + f" CREATE SCOPED TEMPORARY TABLE {temp_table_name} AS SELECT" + ) assert expected_sql in ( session._plan_builder.save_as_table( table_name=[temp_table_name], diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index 4cf335678e4..0576198c03c 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -3015,6 +3015,31 @@ def test_create_dynamic_table(session, table_name_1, is_transient): Utils.drop_dynamic_table(session, dt_name) +@pytest.mark.xfail( + "config.getoption('local_testing_mode', default=False)", + reason="Dynamic table is a SQL feature", + run=False, +) +def test_create_dynamic_table_with_explode(session): + dt_name = Utils.random_name_for_temp_object(TempObjectType.DYNAMIC_TABLE) + temp_table = Utils.random_name_for_temp_object(TempObjectType.TABLE) + try: + df = session.create_dataframe( + [[1, [1, 2, 3]], [2, [11, 22]]], schema=["idx", "lists"] + ) + df.write.mode("overwrite").save_as_table(temp_table) + df = session.table(temp_table) + df1 = df.select(df.idx, explode(df.lists)) + df1.create_or_replace_dynamic_table( + dt_name, warehouse=session.get_current_warehouse(), lag="1 min" + ) + session.sql(f"alter dynamic table {dt_name} refresh").collect() + res = session.sql(f"show dynamic tables like '{dt_name}'").collect() + assert len(res) == 1 + finally: + Utils.drop_table(session, temp_table) + + @pytest.mark.xfail( "config.getoption('local_testing_mode', default=False)", reason="Dynamic table is a SQL feature", diff --git a/tests/integ/test_large_query_breakdown.py b/tests/integ/test_large_query_breakdown.py index 1368bf460f2..fb4d5517b98 100644 --- a/tests/integ/test_large_query_breakdown.py +++ b/tests/integ/test_large_query_breakdown.py @@ -47,6 +47,7 @@ def setup(session): cte_optimization_enabled = session._cte_optimization_enabled is_query_compilation_stage_enabled = session._query_compilation_stage_enabled session._query_compilation_stage_enabled = True + session._large_query_breakdown_enabled = True yield session._query_compilation_stage_enabled = is_query_compilation_stage_enabled session._cte_optimization_enabled = cte_optimization_enabled @@ -77,11 +78,32 @@ def check_result_with_and_without_breakdown(session, df): session._large_query_breakdown_enabled = large_query_enabled +def test_no_valid_nodes_found(session, large_query_df, caplog): + """Test large query breakdown works with default bounds""" + set_bounds(300, 600) + + base_df = session.sql("select 1 as A, 2 as B") + df1 = base_df.with_column("A", col("A") + lit(1)) + df2 = base_df.with_column("B", col("B") + lit(1)) + + for i in range(102): + df1 = df1.with_column("A", col("A") + lit(i)) + df2 = df2.with_column("B", col("B") + lit(i)) + + union_df = df1.union_all(df2) + final_df = union_df.with_column("A", col("A") + lit(1)) + + with caplog.at_level(logging.DEBUG): + queries = final_df.queries + assert len(queries["queries"]) == 1, queries["queries"] + assert len(queries["post_actions"]) == 0, queries["post_actions"] + assert "Could not find a valid node for partitioning" in caplog.text + + def test_large_query_breakdown_with_cte_optimization(session): """Test large query breakdown works with cte optimized plan""" set_bounds(300, 600) session._cte_optimization_enabled = True - session._large_query_breakdown_enabled = True df0 = session.sql("select 2 as b, 32 as c") df1 = session.sql("select 1 as a, 2 as b").filter(col("a") == 1) df1 = df1.join(df0, on=["b"], how="inner") @@ -99,7 +121,7 @@ def test_large_query_breakdown_with_cte_optimization(session): check_result_with_and_without_breakdown(session, df4) assert len(df4.queries["queries"]) == 2 - assert df4.queries["queries"][0].startswith("CREATE TEMP TABLE") + assert df4.queries["queries"][0].startswith("CREATE SCOPED TEMPORARY TABLE") assert df4.queries["queries"][1].startswith("WITH SNOWPARK_TEMP_CTE_") assert len(df4.queries["post_actions"]) == 1 @@ -108,14 +130,13 @@ def test_large_query_breakdown_with_cte_optimization(session): def test_save_as_table(session, large_query_df): set_bounds(300, 600) - session._large_query_breakdown_enabled = True table_name = Utils.random_table_name() with session.query_history() as history: large_query_df.write.save_as_table(table_name, mode="overwrite") assert len(history.queries) == 4 assert history.queries[0].sql_text == "SELECT CURRENT_TRANSACTION()" - assert history.queries[1].sql_text.startswith("CREATE TEMP TABLE") + assert history.queries[1].sql_text.startswith("CREATE SCOPED TEMPORARY TABLE") assert history.queries[2].sql_text.startswith( f"CREATE OR REPLACE TABLE {table_name}" ) @@ -135,7 +156,7 @@ def test_update_delete_merge(session, large_query_df): t.update({"B": 0}, t.a == large_query_df.a, large_query_df) assert len(history.queries) == 4 assert history.queries[0].sql_text == "SELECT CURRENT_TRANSACTION()" - assert history.queries[1].sql_text.startswith("CREATE TEMP TABLE") + assert history.queries[1].sql_text.startswith("CREATE SCOPED TEMPORARY TABLE") assert history.queries[2].sql_text.startswith(f"UPDATE {table_name}") assert history.queries[3].sql_text.startswith("DROP TABLE If EXISTS") @@ -144,7 +165,7 @@ def test_update_delete_merge(session, large_query_df): t.delete(t.a == large_query_df.a, large_query_df) assert len(history.queries) == 4 assert history.queries[0].sql_text == "SELECT CURRENT_TRANSACTION()" - assert history.queries[1].sql_text.startswith("CREATE TEMP TABLE") + assert history.queries[1].sql_text.startswith("CREATE SCOPED TEMPORARY TABLE") assert history.queries[2].sql_text.startswith(f"DELETE FROM {table_name} USING") assert history.queries[3].sql_text.startswith("DROP TABLE If EXISTS") @@ -157,14 +178,13 @@ def test_update_delete_merge(session, large_query_df): ) assert len(history.queries) == 4 assert history.queries[0].sql_text == "SELECT CURRENT_TRANSACTION()" - assert history.queries[1].sql_text.startswith("CREATE TEMP TABLE") + assert history.queries[1].sql_text.startswith("CREATE SCOPED TEMPORARY TABLE") assert history.queries[2].sql_text.startswith(f"MERGE INTO {table_name} USING") assert history.queries[3].sql_text.startswith("DROP TABLE If EXISTS") def test_copy_into_location(session, large_query_df): set_bounds(300, 600) - session._large_query_breakdown_enabled = True remote_file_path = f"{session.get_session_stage()}/df.parquet" with session.query_history() as history: large_query_df.write.copy_into_location( @@ -176,14 +196,13 @@ def test_copy_into_location(session, large_query_df): ) assert len(history.queries) == 4, history.queries assert history.queries[0].sql_text == "SELECT CURRENT_TRANSACTION()" - assert history.queries[1].sql_text.startswith("CREATE TEMP TABLE") + assert history.queries[1].sql_text.startswith("CREATE SCOPED TEMPORARY TABLE") assert history.queries[2].sql_text.startswith(f"COPY INTO '{remote_file_path}'") assert history.queries[3].sql_text.startswith("DROP TABLE If EXISTS") def test_pivot_unpivot(session): set_bounds(300, 600) - session._large_query_breakdown_enabled = True session.sql( """create or replace temp table monthly_sales(A int, B int, month text) as select * from values @@ -215,7 +234,7 @@ def test_pivot_unpivot(session): plan_queries = final_df.queries assert len(plan_queries["queries"]) == 2 - assert plan_queries["queries"][0].startswith("CREATE TEMP TABLE") + assert plan_queries["queries"][0].startswith("CREATE SCOPED TEMPORARY TABLE") assert len(plan_queries["post_actions"]) == 1 assert plan_queries["post_actions"][0].startswith("DROP TABLE If EXISTS") @@ -223,7 +242,6 @@ def test_pivot_unpivot(session): def test_sort(session): set_bounds(300, 600) - session._large_query_breakdown_enabled = True base_df = session.sql("select 1 as A, 2 as B") df1 = base_df.with_column("A", col("A") + lit(1)) df2 = base_df.with_column("B", col("B") + lit(1)) @@ -239,7 +257,7 @@ def test_sort(session): plan_queries = final_df.queries assert len(plan_queries["queries"]) == 2 - assert plan_queries["queries"][0].startswith("CREATE TEMP TABLE") + assert plan_queries["queries"][0].startswith("CREATE SCOPED TEMPORARY TABLE") assert len(plan_queries["post_actions"]) == 1 assert plan_queries["post_actions"][0].startswith("DROP TABLE If EXISTS") @@ -258,7 +276,6 @@ def test_sort(session): def test_multiple_query_plan(session, large_query_df): set_bounds(300, 600) original_threshold = analyzer.ARRAY_BIND_THRESHOLD - session._large_query_breakdown_enabled = True try: analyzer.ARRAY_BIND_THRESHOLD = 2 base_df = session.create_dataframe([[1, 2], [3, 4]], schema=["A", "B"]) @@ -283,7 +300,7 @@ def test_multiple_query_plan(session, large_query_df): "CREATE OR REPLACE SCOPED TEMPORARY TABLE" ) assert plan_queries["queries"][1].startswith("INSERT INTO") - assert plan_queries["queries"][2].startswith("CREATE TEMP TABLE") + assert plan_queries["queries"][2].startswith("CREATE SCOPED TEMPORARY TABLE") assert len(plan_queries["post_actions"]) == 2 for query in plan_queries["post_actions"]: @@ -296,7 +313,6 @@ def test_multiple_query_plan(session, large_query_df): def test_optimization_skipped_with_transaction(session, large_query_df, caplog): """Test large query breakdown is skipped when transaction is enabled""" set_bounds(300, 600) - session._large_query_breakdown_enabled = True session.sql("begin").collect() assert Utils.is_active_transaction(session) with caplog.at_level(logging.DEBUG): @@ -316,7 +332,6 @@ def test_optimization_skipped_with_views_and_dynamic_tables(session, caplog): source_table = Utils.random_table_name() table_name = Utils.random_table_name() view_name = Utils.random_view_name() - session._large_query_breakdown_enabled = True try: session.sql("select 1 as a, 2 as b").write.save_as_table(source_table) df = session.table(source_table) @@ -344,12 +359,13 @@ def test_optimization_skipped_with_views_and_dynamic_tables(session, caplog): def test_async_job_with_large_query_breakdown(session, large_query_df): """Test large query breakdown gives same result for async and non-async jobs""" set_bounds(300, 600) - session._large_query_breakdown_enabled = True job = large_query_df.collect(block=False) result = job.result() assert result == large_query_df.collect() assert len(large_query_df.queries["queries"]) == 2 - assert large_query_df.queries["queries"][0].startswith("CREATE TEMP TABLE") + assert large_query_df.queries["queries"][0].startswith( + "CREATE SCOPED TEMPORARY TABLE" + ) assert len(large_query_df.queries["post_actions"]) == 1 assert large_query_df.queries["post_actions"][0].startswith( @@ -362,20 +378,24 @@ def test_complexity_bounds_affect_num_partitions(session, large_query_df): Also test that when partitions are added, drop table queries are added. """ set_bounds(300, 600) - session._large_query_breakdown_enabled = True assert len(large_query_df.queries["queries"]) == 2 assert len(large_query_df.queries["post_actions"]) == 1 - assert large_query_df.queries["queries"][0].startswith("CREATE TEMP TABLE") + assert large_query_df.queries["queries"][0].startswith( + "CREATE SCOPED TEMPORARY TABLE" + ) assert large_query_df.queries["post_actions"][0].startswith( "DROP TABLE If EXISTS" ) set_bounds(300, 412) - session._large_query_breakdown_enabled = True assert len(large_query_df.queries["queries"]) == 3 assert len(large_query_df.queries["post_actions"]) == 2 - assert large_query_df.queries["queries"][0].startswith("CREATE TEMP TABLE") - assert large_query_df.queries["queries"][1].startswith("CREATE TEMP TABLE") + assert large_query_df.queries["queries"][0].startswith( + "CREATE SCOPED TEMPORARY TABLE" + ) + assert large_query_df.queries["queries"][1].startswith( + "CREATE SCOPED TEMPORARY TABLE" + ) assert large_query_df.queries["post_actions"][0].startswith( "DROP TABLE If EXISTS" ) diff --git a/tests/notebooks/modin/MIMICHealthcareDemo.ipynb b/tests/notebooks/modin/MIMICHealthcareDemo.ipynb index 95a75e3c858..52388fe7ddd 100644 --- a/tests/notebooks/modin/MIMICHealthcareDemo.ipynb +++ b/tests/notebooks/modin/MIMICHealthcareDemo.ipynb @@ -34,10 +34,10 @@ "id": "90243e71-4cf0-4971-a95e-3f29e12449fc", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:35.536214Z", - "iopub.status.busy": "2024-08-28T17:27:35.535897Z", - "iopub.status.idle": "2024-08-28T17:27:36.977905Z", - "shell.execute_reply": "2024-08-28T17:27:36.977472Z" + "iopub.execute_input": "2024-08-29T20:52:59.781777Z", + "iopub.status.busy": "2024-08-29T20:52:59.781651Z", + "iopub.status.idle": "2024-08-29T20:53:01.465309Z", + "shell.execute_reply": "2024-08-29T20:53:01.464055Z" }, "tags": [] }, @@ -70,10 +70,10 @@ "id": "c309356f-14f8-469a-9257-b944b8951410", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:36.980268Z", - "iopub.status.busy": "2024-08-28T17:27:36.980102Z", - "iopub.status.idle": "2024-08-28T17:27:45.691050Z", - "shell.execute_reply": "2024-08-28T17:27:45.690724Z" + "iopub.execute_input": "2024-08-29T20:53:01.474913Z", + "iopub.status.busy": "2024-08-29T20:53:01.473383Z", + "iopub.status.idle": "2024-08-29T20:53:09.493517Z", + "shell.execute_reply": "2024-08-29T20:53:09.491938Z" }, "tags": [] }, @@ -97,10 +97,10 @@ "id": "68823bb5-fcd1-4f92-b767-e5ac83dc3df7", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:45.693385Z", - "iopub.status.busy": "2024-08-28T17:27:45.693251Z", - "iopub.status.idle": "2024-08-28T17:27:46.018818Z", - "shell.execute_reply": "2024-08-28T17:27:46.018231Z" + "iopub.execute_input": "2024-08-29T20:53:09.501294Z", + "iopub.status.busy": "2024-08-29T20:53:09.500816Z", + "iopub.status.idle": "2024-08-29T20:53:10.389392Z", + "shell.execute_reply": "2024-08-29T20:53:10.388512Z" }, "tags": [] }, @@ -145,10 +145,10 @@ "id": "9a7fc3b9-50db-49da-a18a-8865a3356f31", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:46.022960Z", - "iopub.status.busy": "2024-08-28T17:27:46.022736Z", - "iopub.status.idle": "2024-08-28T17:27:49.916885Z", - "shell.execute_reply": "2024-08-28T17:27:49.916624Z" + "iopub.execute_input": "2024-08-29T20:53:10.397141Z", + "iopub.status.busy": "2024-08-29T20:53:10.396693Z", + "iopub.status.idle": "2024-08-29T20:53:18.519633Z", + "shell.execute_reply": "2024-08-29T20:53:18.518329Z" }, "tags": [] }, @@ -331,10 +331,10 @@ "id": "7692a0af-de2f-42d1-9110-15ce104c2c5c", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:49.918782Z", - "iopub.status.busy": "2024-08-28T17:27:49.918678Z", - "iopub.status.idle": "2024-08-28T17:27:50.561066Z", - "shell.execute_reply": "2024-08-28T17:27:50.560658Z" + "iopub.execute_input": "2024-08-29T20:53:18.525954Z", + "iopub.status.busy": "2024-08-29T20:53:18.525686Z", + "iopub.status.idle": "2024-08-29T20:53:19.864003Z", + "shell.execute_reply": "2024-08-29T20:53:19.863649Z" }, "tags": [] }, @@ -390,10 +390,10 @@ "id": "5344da61-915d-43cf-894a-484876450748", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:50.563582Z", - "iopub.status.busy": "2024-08-28T17:27:50.563395Z", - "iopub.status.idle": "2024-08-28T17:27:50.768782Z", - "shell.execute_reply": "2024-08-28T17:27:50.768309Z" + "iopub.execute_input": "2024-08-29T20:53:19.866177Z", + "iopub.status.busy": "2024-08-29T20:53:19.866034Z", + "iopub.status.idle": "2024-08-29T20:53:20.363772Z", + "shell.execute_reply": "2024-08-29T20:53:20.363248Z" } }, "outputs": [ @@ -401,8 +401,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:snowflake.snowpark.modin.plugin.utils.warning_message:`to_datetime` implementation has mismatches with pandas:\n", - "Snowpark pandas to_datetime uses Snowflake's automatic format detection to convert string to datetime when a format is not provided. In this case Snowflake's auto format may yield different result values compared to pandas..\n" + "WARNING:snowflake.snowpark.modin.plugin.utils.warning_message:`to_datetime` implementation may have mismatches with pandas:\n", + "Snowflake automatic format detection is used when a format is not provided. In this case Snowflake's auto format may yield different result values compared to pandas.See https://docs.snowflake.com/en/sql-reference/date-time-input-output#supported-formats-for-auto-detection for details.\n" ] } ], @@ -428,10 +428,10 @@ "id": "5f72ca6b-ae9a-4a68-a391-83b065785004", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:50.770869Z", - "iopub.status.busy": "2024-08-28T17:27:50.770722Z", - "iopub.status.idle": "2024-08-28T17:27:50.888703Z", - "shell.execute_reply": "2024-08-28T17:27:50.888387Z" + "iopub.execute_input": "2024-08-29T20:53:20.366126Z", + "iopub.status.busy": "2024-08-29T20:53:20.365983Z", + "iopub.status.idle": "2024-08-29T20:53:20.562742Z", + "shell.execute_reply": "2024-08-29T20:53:20.562425Z" }, "tags": [] }, @@ -446,10 +446,10 @@ "id": "ecc19928-1d3a-49b8-bc0d-4270e53bfc4c", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:50.890752Z", - "iopub.status.busy": "2024-08-28T17:27:50.890619Z", - "iopub.status.idle": "2024-08-28T17:27:51.188395Z", - "shell.execute_reply": "2024-08-28T17:27:51.188083Z" + "iopub.execute_input": "2024-08-29T20:53:20.565128Z", + "iopub.status.busy": "2024-08-29T20:53:20.564972Z", + "iopub.status.idle": "2024-08-29T20:53:21.237178Z", + "shell.execute_reply": "2024-08-29T20:53:21.236687Z" } }, "outputs": [], @@ -471,10 +471,10 @@ "id": "50c62f3f-a804-4efd-89bb-cf689a870055", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:51.190704Z", - "iopub.status.busy": "2024-08-28T17:27:51.190570Z", - "iopub.status.idle": "2024-08-28T17:27:51.563926Z", - "shell.execute_reply": "2024-08-28T17:27:51.563299Z" + "iopub.execute_input": "2024-08-29T20:53:21.239635Z", + "iopub.status.busy": "2024-08-29T20:53:21.239465Z", + "iopub.status.idle": "2024-08-29T20:53:21.790821Z", + "shell.execute_reply": "2024-08-29T20:53:21.790376Z" }, "tags": [] }, @@ -499,10 +499,10 @@ "id": "66ac1e04-4581-4292-8b7a-b88faa76edf5", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:51.567168Z", - "iopub.status.busy": "2024-08-28T17:27:51.566843Z", - "iopub.status.idle": "2024-08-28T17:27:52.325449Z", - "shell.execute_reply": "2024-08-28T17:27:52.325162Z" + "iopub.execute_input": "2024-08-29T20:53:21.793183Z", + "iopub.status.busy": "2024-08-29T20:53:21.793045Z", + "iopub.status.idle": "2024-08-29T20:53:23.191350Z", + "shell.execute_reply": "2024-08-29T20:53:23.191053Z" }, "tags": [] }, @@ -569,10 +569,10 @@ "id": "17b76fe7-4d6d-4eb4-bebe-55cc643b69f3", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:52.327478Z", - "iopub.status.busy": "2024-08-28T17:27:52.327310Z", - "iopub.status.idle": "2024-08-28T17:27:55.549227Z", - "shell.execute_reply": "2024-08-28T17:27:55.548770Z" + "iopub.execute_input": "2024-08-29T20:53:23.201474Z", + "iopub.status.busy": "2024-08-29T20:53:23.201235Z", + "iopub.status.idle": "2024-08-29T20:53:27.315733Z", + "shell.execute_reply": "2024-08-29T20:53:27.314718Z" }, "tags": [] }, @@ -595,10 +595,10 @@ "id": "8514feca-f6b3-4186-bd32-ef07ba8efed4", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:55.552055Z", - "iopub.status.busy": "2024-08-28T17:27:55.551878Z", - "iopub.status.idle": "2024-08-28T17:27:55.941773Z", - "shell.execute_reply": "2024-08-28T17:27:55.941284Z" + "iopub.execute_input": "2024-08-29T20:53:27.325717Z", + "iopub.status.busy": "2024-08-29T20:53:27.324858Z", + "iopub.status.idle": "2024-08-29T20:53:28.100711Z", + "shell.execute_reply": "2024-08-29T20:53:28.099954Z" }, "tags": [] }, @@ -613,10 +613,10 @@ "id": "bf8025c3-8657-41a7-8feb-6afab251ccfd", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:55.944225Z", - "iopub.status.busy": "2024-08-28T17:27:55.944051Z", - "iopub.status.idle": "2024-08-28T17:27:56.081283Z", - "shell.execute_reply": "2024-08-28T17:27:56.080891Z" + "iopub.execute_input": "2024-08-29T20:53:28.106124Z", + "iopub.status.busy": "2024-08-29T20:53:28.105739Z", + "iopub.status.idle": "2024-08-29T20:53:28.469703Z", + "shell.execute_reply": "2024-08-29T20:53:28.469403Z" }, "tags": [] }, @@ -642,10 +642,10 @@ "id": "60ba61f7-fa60-4a6d-8b06-1282d2f64382", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:56.083562Z", - "iopub.status.busy": "2024-08-28T17:27:56.083425Z", - "iopub.status.idle": "2024-08-28T17:27:56.085148Z", - "shell.execute_reply": "2024-08-28T17:27:56.084867Z" + "iopub.execute_input": "2024-08-29T20:53:28.471863Z", + "iopub.status.busy": "2024-08-29T20:53:28.471732Z", + "iopub.status.idle": "2024-08-29T20:53:28.473440Z", + "shell.execute_reply": "2024-08-29T20:53:28.473133Z" }, "tags": [] }, @@ -661,10 +661,10 @@ "id": "5cdeb9af-660a-4daa-98c5-f9e86699e9bd", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:56.086699Z", - "iopub.status.busy": "2024-08-28T17:27:56.086595Z", - "iopub.status.idle": "2024-08-28T17:27:57.259523Z", - "shell.execute_reply": "2024-08-28T17:27:57.259142Z" + "iopub.execute_input": "2024-08-29T20:53:28.475189Z", + "iopub.status.busy": "2024-08-29T20:53:28.475066Z", + "iopub.status.idle": "2024-08-29T20:53:30.036155Z", + "shell.execute_reply": "2024-08-29T20:53:30.035460Z" }, "tags": [] }, @@ -704,10 +704,10 @@ "id": "2b704957-4b20-41a9-abbb-1d963a0ea0d2", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:57.261881Z", - "iopub.status.busy": "2024-08-28T17:27:57.261730Z", - "iopub.status.idle": "2024-08-28T17:27:57.985080Z", - "shell.execute_reply": "2024-08-28T17:27:57.984756Z" + "iopub.execute_input": "2024-08-29T20:53:30.042861Z", + "iopub.status.busy": "2024-08-29T20:53:30.042419Z", + "iopub.status.idle": "2024-08-29T20:53:30.738218Z", + "shell.execute_reply": "2024-08-29T20:53:30.736870Z" }, "tags": [] }, @@ -754,10 +754,10 @@ "id": "1748639f-04b5-45e6-b836-2433b66fa29d", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:57.986888Z", - "iopub.status.busy": "2024-08-28T17:27:57.986758Z", - "iopub.status.idle": "2024-08-28T17:27:59.498296Z", - "shell.execute_reply": "2024-08-28T17:27:59.498013Z" + "iopub.execute_input": "2024-08-29T20:53:30.743369Z", + "iopub.status.busy": "2024-08-29T20:53:30.743214Z", + "iopub.status.idle": "2024-08-29T20:53:32.247987Z", + "shell.execute_reply": "2024-08-29T20:53:32.245175Z" }, "tags": [] }, @@ -799,10 +799,10 @@ "id": "24a34764-f442-4cc1-8b87-ed96ace34651", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:59.500159Z", - "iopub.status.busy": "2024-08-28T17:27:59.500025Z", - "iopub.status.idle": "2024-08-28T17:28:00.076867Z", - "shell.execute_reply": "2024-08-28T17:28:00.076522Z" + "iopub.execute_input": "2024-08-29T20:53:32.251992Z", + "iopub.status.busy": "2024-08-29T20:53:32.251663Z", + "iopub.status.idle": "2024-08-29T20:53:33.298148Z", + "shell.execute_reply": "2024-08-29T20:53:33.297812Z" }, "tags": [] }, @@ -834,10 +834,10 @@ "id": "96753257-acd4-4ba9-b81b-19dc0a2af53c", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:00.079097Z", - "iopub.status.busy": "2024-08-28T17:28:00.078936Z", - "iopub.status.idle": "2024-08-28T17:28:00.081233Z", - "shell.execute_reply": "2024-08-28T17:28:00.080958Z" + "iopub.execute_input": "2024-08-29T20:53:33.300115Z", + "iopub.status.busy": "2024-08-29T20:53:33.299993Z", + "iopub.status.idle": "2024-08-29T20:53:33.302005Z", + "shell.execute_reply": "2024-08-29T20:53:33.301532Z" }, "tags": [] }, @@ -871,10 +871,10 @@ "id": "2d26eee2-671a-4ff8-ac22-62612c1a1ced", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:00.083739Z", - "iopub.status.busy": "2024-08-28T17:28:00.083616Z", - "iopub.status.idle": "2024-08-28T17:28:00.944153Z", - "shell.execute_reply": "2024-08-28T17:28:00.943809Z" + "iopub.execute_input": "2024-08-29T20:53:33.315185Z", + "iopub.status.busy": "2024-08-29T20:53:33.315012Z", + "iopub.status.idle": "2024-08-29T20:53:34.722137Z", + "shell.execute_reply": "2024-08-29T20:53:34.721832Z" }, "tags": [] }, @@ -916,10 +916,10 @@ "id": "21aef8ae-47d8-4c77-8e04-270304c41d4e", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:00.946550Z", - "iopub.status.busy": "2024-08-28T17:28:00.946409Z", - "iopub.status.idle": "2024-08-28T17:28:02.622587Z", - "shell.execute_reply": "2024-08-28T17:28:02.622199Z" + "iopub.execute_input": "2024-08-29T20:53:34.724622Z", + "iopub.status.busy": "2024-08-29T20:53:34.724479Z", + "iopub.status.idle": "2024-08-29T20:53:37.680974Z", + "shell.execute_reply": "2024-08-29T20:53:37.680658Z" }, "tags": [] }, @@ -958,10 +958,10 @@ "id": "2d11b951-5b4c-4a98-ae4c-883fbccd56a7", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:02.624633Z", - "iopub.status.busy": "2024-08-28T17:28:02.624483Z", - "iopub.status.idle": "2024-08-28T17:28:02.933061Z", - "shell.execute_reply": "2024-08-28T17:28:02.932626Z" + "iopub.execute_input": "2024-08-29T20:53:37.683001Z", + "iopub.status.busy": "2024-08-29T20:53:37.682857Z", + "iopub.status.idle": "2024-08-29T20:53:38.194095Z", + "shell.execute_reply": "2024-08-29T20:53:38.193778Z" }, "tags": [] }, @@ -977,10 +977,10 @@ "id": "35155531-c8ff-4ed1-9a3e-e457176f9f20", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:02.935312Z", - "iopub.status.busy": "2024-08-28T17:28:02.935204Z", - "iopub.status.idle": "2024-08-28T17:28:04.421197Z", - "shell.execute_reply": "2024-08-28T17:28:04.420876Z" + "iopub.execute_input": "2024-08-29T20:53:38.196253Z", + "iopub.status.busy": "2024-08-29T20:53:38.196113Z", + "iopub.status.idle": "2024-08-29T20:53:40.521366Z", + "shell.execute_reply": "2024-08-29T20:53:40.520687Z" }, "tags": [] }, @@ -1139,10 +1139,10 @@ "id": "b8c41494-755a-485b-8119-9dfff98213df", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:04.423275Z", - "iopub.status.busy": "2024-08-28T17:28:04.423133Z", - "iopub.status.idle": "2024-08-28T17:28:05.150707Z", - "shell.execute_reply": "2024-08-28T17:28:05.150379Z" + "iopub.execute_input": "2024-08-29T20:53:40.525624Z", + "iopub.status.busy": "2024-08-29T20:53:40.525457Z", + "iopub.status.idle": "2024-08-29T20:53:41.863763Z", + "shell.execute_reply": "2024-08-29T20:53:41.862737Z" }, "tags": [] }, @@ -1192,10 +1192,10 @@ "metadata": { "collapsed": false, "execution": { - "iopub.execute_input": "2024-08-28T17:28:05.152844Z", - "iopub.status.busy": "2024-08-28T17:28:05.152694Z", - "iopub.status.idle": "2024-08-28T17:28:07.249455Z", - "shell.execute_reply": "2024-08-28T17:28:07.248760Z" + "iopub.execute_input": "2024-08-29T20:53:41.868179Z", + "iopub.status.busy": "2024-08-29T20:53:41.868013Z", + "iopub.status.idle": "2024-08-29T20:53:45.387411Z", + "shell.execute_reply": "2024-08-29T20:53:45.386513Z" }, "jupyter": { "outputs_hidden": false @@ -1212,10 +1212,10 @@ "id": "719049a4-0a5b-45da-bbd5-8ff073c95a93", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:07.253088Z", - "iopub.status.busy": "2024-08-28T17:28:07.252690Z", - "iopub.status.idle": "2024-08-28T17:28:07.972094Z", - "shell.execute_reply": "2024-08-28T17:28:07.971764Z" + "iopub.execute_input": "2024-08-29T20:53:45.394651Z", + "iopub.status.busy": "2024-08-29T20:53:45.394283Z", + "iopub.status.idle": "2024-08-29T20:53:48.805198Z", + "shell.execute_reply": "2024-08-29T20:53:48.804855Z" }, "tags": [] }, @@ -1240,10 +1240,10 @@ "id": "9e1f2052-7405-496c-b4de-76e031978cb5", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:07.974120Z", - "iopub.status.busy": "2024-08-28T17:28:07.973931Z", - "iopub.status.idle": "2024-08-28T17:28:07.979210Z", - "shell.execute_reply": "2024-08-28T17:28:07.978966Z" + "iopub.execute_input": "2024-08-29T20:53:48.807541Z", + "iopub.status.busy": "2024-08-29T20:53:48.807331Z", + "iopub.status.idle": "2024-08-29T20:53:48.813010Z", + "shell.execute_reply": "2024-08-29T20:53:48.812749Z" }, "tags": [] }, @@ -1678,10 +1678,10 @@ "id": "dcb50a0d-3f66-4376-a383-597789f83fa0", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:07.980853Z", - "iopub.status.busy": "2024-08-28T17:28:07.980748Z", - "iopub.status.idle": "2024-08-28T17:28:07.983038Z", - "shell.execute_reply": "2024-08-28T17:28:07.982709Z" + "iopub.execute_input": "2024-08-29T20:53:48.824720Z", + "iopub.status.busy": "2024-08-29T20:53:48.824554Z", + "iopub.status.idle": "2024-08-29T20:53:48.827307Z", + "shell.execute_reply": "2024-08-29T20:53:48.826940Z" }, "tags": [] }, @@ -1704,10 +1704,10 @@ "id": "4993b18c-7d2a-49b6-96f5-b4a7c6a38cc2", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:07.984580Z", - "iopub.status.busy": "2024-08-28T17:28:07.984481Z", - "iopub.status.idle": "2024-08-28T17:28:08.048040Z", - "shell.execute_reply": "2024-08-28T17:28:08.047710Z" + "iopub.execute_input": "2024-08-29T20:53:48.829359Z", + "iopub.status.busy": "2024-08-29T20:53:48.829219Z", + "iopub.status.idle": "2024-08-29T20:53:48.899436Z", + "shell.execute_reply": "2024-08-29T20:53:48.899061Z" }, "tags": [] }, @@ -1715,7 +1715,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 29, @@ -1747,10 +1747,10 @@ "id": "0c6fde6b-1126-4625-9c6e-7223eb97c30b", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:08.049898Z", - "iopub.status.busy": "2024-08-28T17:28:08.049780Z", - "iopub.status.idle": "2024-08-28T17:28:08.052183Z", - "shell.execute_reply": "2024-08-28T17:28:08.051914Z" + "iopub.execute_input": "2024-08-29T20:53:48.903896Z", + "iopub.status.busy": "2024-08-29T20:53:48.903763Z", + "iopub.status.idle": "2024-08-29T20:53:48.906387Z", + "shell.execute_reply": "2024-08-29T20:53:48.906031Z" }, "tags": [] }, diff --git a/tests/notebooks/modin/TimeSeriesTesting.ipynb b/tests/notebooks/modin/TimeSeriesTesting.ipynb index b21dc046b66..e1d7ac54fec 100644 --- a/tests/notebooks/modin/TimeSeriesTesting.ipynb +++ b/tests/notebooks/modin/TimeSeriesTesting.ipynb @@ -3,9 +3,16 @@ { "cell_type": "markdown", "id": "143e5d4a-ca70-4ac8-a61e-be7c93c17d20", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "# Snowpark pandas Time series / date functionality" + "# Snowpark pandas Time series / date functionality\n", + "See https://pandas.pydata.org/docs/user_guide/timeseries.html as a reference." ] }, { @@ -14,10 +21,10 @@ "id": "5ece8277-dc52-40f3-913f-1a3145df6bdc", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:35.535408Z", - "iopub.status.busy": "2024-08-28T17:27:35.535052Z", - "iopub.status.idle": "2024-08-28T17:27:36.951326Z", - "shell.execute_reply": "2024-08-28T17:27:36.950877Z" + "iopub.execute_input": "2024-08-29T21:12:38.130378Z", + "iopub.status.busy": "2024-08-29T21:12:38.130058Z", + "iopub.status.idle": "2024-08-29T21:12:39.788669Z", + "shell.execute_reply": "2024-08-29T21:12:39.787288Z" } }, "outputs": [], @@ -41,10 +48,10 @@ "id": "c127fb50-c570-46fb-a074-6e8eb3ede058", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:36.953723Z", - "iopub.status.busy": "2024-08-28T17:27:36.953532Z", - "iopub.status.idle": "2024-08-28T17:27:36.955730Z", - "shell.execute_reply": "2024-08-28T17:27:36.955323Z" + "iopub.execute_input": "2024-08-29T21:12:39.801080Z", + "iopub.status.busy": "2024-08-29T21:12:39.800293Z", + "iopub.status.idle": "2024-08-29T21:12:39.804335Z", + "shell.execute_reply": "2024-08-29T21:12:39.803709Z" } }, "outputs": [], @@ -56,9 +63,15 @@ { "cell_type": "markdown", "id": "02d7afa0-0224-4033-b2c7-465b67642201", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Parsing time series information from various sources and formats" + "##### Parsing time series information from various sources and formats" ] }, { @@ -66,22 +79,19 @@ "execution_count": 3, "id": "8d5f4a0a-fe5c-4a94-94ba-f16d258f92a6", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:36.958090Z", - "iopub.status.busy": "2024-08-28T17:27:36.957962Z", - "iopub.status.idle": "2024-08-28T17:27:37.441073Z", - "shell.execute_reply": "2024-08-28T17:27:37.440761Z" - } + "iopub.execute_input": "2024-08-29T21:12:39.808203Z", + "iopub.status.busy": "2024-08-29T21:12:39.807788Z", + "iopub.status.idle": "2024-08-29T21:12:40.958046Z", + "shell.execute_reply": "2024-08-29T21:12:40.957741Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "`to_datetime` implementation has mismatches with pandas:\n", - "Snowpark pandas to_datetime uses Snowflake's automatic format detection to convert string to datetime when a format is not provided. In this case Snowflake's auto format may yield different result values compared to pandas..\n" - ] - }, { "data": { "text/plain": [ @@ -103,9 +113,15 @@ { "cell_type": "markdown", "id": "1df6a98f-a79b-4e90-b314-0cdf4f94fbfd", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Generate sequences of fixed-frequency dates and time spans" + "##### Generate sequences of fixed-frequency dates and time spans" ] }, { @@ -113,12 +129,17 @@ "execution_count": 4, "id": "28d01637-1093-43ea-a791-bc167243530e", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:37.443821Z", - "iopub.status.busy": "2024-08-28T17:27:37.443686Z", - "iopub.status.idle": "2024-08-28T17:27:37.973506Z", - "shell.execute_reply": "2024-08-28T17:27:37.973040Z" - } + "iopub.execute_input": "2024-08-29T21:12:40.960205Z", + "iopub.status.busy": "2024-08-29T21:12:40.959968Z", + "iopub.status.idle": "2024-08-29T21:12:42.085992Z", + "shell.execute_reply": "2024-08-29T21:12:42.084980Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -142,9 +163,15 @@ { "cell_type": "markdown", "id": "727ff6dd-9af1-4abd-a276-6042bf3b6878", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Manipulating and converting date times with timezone information" + "##### Manipulating and converting date times with timezone information" ] }, { @@ -152,17 +179,42 @@ "execution_count": 5, "id": "1f2c79bc-2a9e-41f0-ab36-44fcc20d119a", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:37.976832Z", - "iopub.status.busy": "2024-08-28T17:27:37.976629Z", - "iopub.status.idle": "2024-08-28T17:27:37.978940Z", - "shell.execute_reply": "2024-08-28T17:27:37.978566Z" - } + "iopub.execute_input": "2024-08-29T21:12:42.091191Z", + "iopub.status.busy": "2024-08-29T21:12:42.090881Z", + "iopub.status.idle": "2024-08-29T21:12:42.289597Z", + "shell.execute_reply": "2024-08-29T21:12:42.289228Z" + }, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] }, - "outputs": [], + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method DatetimeIndex.tz_localize", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# TODO SNOW-783178\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m dti \u001b[38;5;241m=\u001b[39m \u001b[43mdti\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtz_localize\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUTC\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method DatetimeIndex.tz_localize" + ] + } + ], "source": [ - "# TODO SNOW-1635620: uncomment when TimeDelta is implemented\n", - "#dti = dti.tz_localize(\"UTC\")" + "# TODO SNOW-783178\n", + "dti = dti.tz_localize(\"UTC\")" ] }, { @@ -170,25 +222,55 @@ "execution_count": 6, "id": "d7916dfa-9716-47e4-92a8-c3c852a3d802", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:37.981235Z", - "iopub.status.busy": "2024-08-28T17:27:37.981056Z", - "iopub.status.idle": "2024-08-28T17:27:37.983005Z", - "shell.execute_reply": "2024-08-28T17:27:37.982681Z" - } + "iopub.execute_input": "2024-08-29T21:12:42.295088Z", + "iopub.status.busy": "2024-08-29T21:12:42.294902Z", + "iopub.status.idle": "2024-08-29T21:12:42.321172Z", + "shell.execute_reply": "2024-08-29T21:12:42.320846Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] }, - "outputs": [], + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method DatetimeIndex.tz_convert", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[6], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# TODO SNOW-1559264\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mdti\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtz_convert\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUS/Pacific\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method DatetimeIndex.tz_convert" + ] + } + ], "source": [ - "# TODO SNOW-1635620: uncomment when TimeDelta is implemented\n", - "#dti.tz_convert(\"US/Pacific\")" + "# TODO SNOW-1559264\n", + "dti.tz_convert(\"US/Pacific\")" ] }, { "cell_type": "markdown", "id": "ed3c83b4-e048-4dbb-8d94-25cb0ee62e66", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Resampling or converting a time series to a particular frequency" + "##### Resampling or converting a time series to a particular frequency" ] }, { @@ -196,12 +278,17 @@ "execution_count": 7, "id": "5aa8cd79-521b-42ee-a3a6-66be36603bcb", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:37.985074Z", - "iopub.status.busy": "2024-08-28T17:27:37.984931Z", - "iopub.status.idle": "2024-08-28T17:27:39.127895Z", - "shell.execute_reply": "2024-08-28T17:27:39.127293Z" - } + "iopub.execute_input": "2024-08-29T21:12:42.326598Z", + "iopub.status.busy": "2024-08-29T21:12:42.326454Z", + "iopub.status.idle": "2024-08-29T21:12:44.537136Z", + "shell.execute_reply": "2024-08-29T21:12:44.536402Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -231,12 +318,17 @@ "execution_count": 8, "id": "796c954c-7f60-441b-b85e-1098824fae4b", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:39.131539Z", - "iopub.status.busy": "2024-08-28T17:27:39.131194Z", - "iopub.status.idle": "2024-08-28T17:27:39.966782Z", - "shell.execute_reply": "2024-08-28T17:27:39.966293Z" - } + "iopub.execute_input": "2024-08-29T21:12:44.542723Z", + "iopub.status.busy": "2024-08-29T21:12:44.542354Z", + "iopub.status.idle": "2024-08-29T21:12:46.752298Z", + "shell.execute_reply": "2024-08-29T21:12:46.751393Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -260,9 +352,15 @@ { "cell_type": "markdown", "id": "e873f69d-e8b5-423f-9f0e-b019d37e15df", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Performing date and time arithmetic with absolute or relative time increments" + "##### Performing date and time arithmetic with absolute or relative time increments" ] }, { @@ -270,12 +368,17 @@ "execution_count": 9, "id": "e7272da8-eae8-4e31-8a61-2f442a6780e0", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:39.969410Z", - "iopub.status.busy": "2024-08-28T17:27:39.969229Z", - "iopub.status.idle": "2024-08-28T17:27:39.972019Z", - "shell.execute_reply": "2024-08-28T17:27:39.971689Z" - } + "iopub.execute_input": "2024-08-29T21:12:46.757227Z", + "iopub.status.busy": "2024-08-29T21:12:46.756719Z", + "iopub.status.idle": "2024-08-29T21:12:46.761430Z", + "shell.execute_reply": "2024-08-29T21:12:46.760778Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -299,12 +402,17 @@ "execution_count": 10, "id": "b69cb16a-9fc7-46fe-a6f6-a6a1ce635dc5", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:39.974318Z", - "iopub.status.busy": "2024-08-28T17:27:39.974172Z", - "iopub.status.idle": "2024-08-28T17:27:39.976711Z", - "shell.execute_reply": "2024-08-28T17:27:39.976411Z" - } + "iopub.execute_input": "2024-08-29T21:12:46.764942Z", + "iopub.status.busy": "2024-08-29T21:12:46.764670Z", + "iopub.status.idle": "2024-08-29T21:12:46.769167Z", + "shell.execute_reply": "2024-08-29T21:12:46.768635Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -319,6 +427,7 @@ } ], "source": [ + "# Add 1 day\n", "saturday = friday + pd.Timedelta(\"1 day\")\n", "saturday.day_name()" ] @@ -328,12 +437,17 @@ "execution_count": 11, "id": "064f4271-9485-497d-b176-b39d4f75248c", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:39.979219Z", - "iopub.status.busy": "2024-08-28T17:27:39.979068Z", - "iopub.status.idle": "2024-08-28T17:27:39.981624Z", - "shell.execute_reply": "2024-08-28T17:27:39.981354Z" - } + "iopub.execute_input": "2024-08-29T21:12:46.773042Z", + "iopub.status.busy": "2024-08-29T21:12:46.772797Z", + "iopub.status.idle": "2024-08-29T21:12:46.776716Z", + "shell.execute_reply": "2024-08-29T21:12:46.776160Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -348,6 +462,7 @@ } ], "source": [ + "# Add 1 business day (Friday --> Monday)\n", "monday = friday + pd.offsets.BDay()\n", "monday.day_name()" ] @@ -357,12 +472,17 @@ "execution_count": 12, "id": "86bf9469-b7d3-44fc-900e-cfd67a065842", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:39.983587Z", - "iopub.status.busy": "2024-08-28T17:27:39.983443Z", - "iopub.status.idle": "2024-08-28T17:27:40.913877Z", - "shell.execute_reply": "2024-08-28T17:27:40.913560Z" - } + "iopub.execute_input": "2024-08-29T21:12:46.779514Z", + "iopub.status.busy": "2024-08-29T21:12:46.779333Z", + "iopub.status.idle": "2024-08-29T21:12:49.324239Z", + "shell.execute_reply": "2024-08-29T21:12:49.319893Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -388,39 +508,57 @@ }, { "cell_type": "markdown", - "id": "1a42453c-bea7-470a-be57-650f42fea9a5", - "metadata": {}, + "id": "922016ad-e1a7-4d42-a250-05f7ed7894d3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Time Series-related instance methods" + "# Overview" ] }, { "cell_type": "markdown", - "id": "86658f09-80b8-42a5-a108-efc4dddfdb09", - "metadata": {}, + "id": "dae7a72f-417b-471a-b353-5b8cab1b7585", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### From timestamps to epoch" + "##### For time series data, it’s conventional to represent the time component in the index of a Series or DataFrame so manipulations can be performed with respect to the time element." ] }, { "cell_type": "code", "execution_count": 13, - "id": "0e4e6063-a60f-4a70-adca-2cb9b3b101f8", + "id": "e763faa7-7806-4d2d-8df7-46a7689af07f", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:40.916090Z", - "iopub.status.busy": "2024-08-28T17:27:40.915940Z", - "iopub.status.idle": "2024-08-28T17:27:41.359184Z", - "shell.execute_reply": "2024-08-28T17:27:41.358781Z" - } + "iopub.execute_input": "2024-08-29T21:12:49.331928Z", + "iopub.status.busy": "2024-08-29T21:12:49.331661Z", + "iopub.status.idle": "2024-08-29T21:12:50.839840Z", + "shell.execute_reply": "2024-08-29T21:12:50.838975Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',\n", - " '2012-10-10 18:15:05', '2012-10-11 18:15:05'],\n", - " dtype='datetime64[ns]', freq=None)" + "2000-01-01 0\n", + "2000-01-02 1\n", + "2000-01-03 2\n", + "Freq: None, dtype: int64" ] }, "execution_count": 13, @@ -429,206 +567,4746 @@ } ], "source": [ - "stamps = pd.date_range(\"2012-10-08 18:15:05\", periods=4, freq=\"D\")\n", - "stamps" + "pd.Series(range(3), index=pd.date_range(\"2000\", freq=\"D\", periods=3))" + ] + }, + { + "cell_type": "markdown", + "id": "61ec1d74-d3d2-4983-8497-2342c7462655", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### However, Series and DataFrame can directly also support the time component as data itself." ] }, { "cell_type": "code", "execution_count": 14, - "id": "f4a38e8b-abcb-49c6-839d-01e4215d7d7a", + "id": "585a116f-14e9-470e-a790-45b0a9de61fe", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:41.361646Z", - "iopub.status.busy": "2024-08-28T17:27:41.361457Z", - "iopub.status.idle": "2024-08-28T17:27:41.363485Z", - "shell.execute_reply": "2024-08-28T17:27:41.363091Z" - } + "iopub.execute_input": "2024-08-29T21:12:50.845071Z", + "iopub.status.busy": "2024-08-29T21:12:50.844702Z", + "iopub.status.idle": "2024-08-29T21:12:51.872836Z", + "shell.execute_reply": "2024-08-29T21:12:51.872201Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 2000-01-01\n", + "1 2000-01-02\n", + "2 2000-01-03\n", + "dtype: datetime64[ns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# TODO SNOW-1635620: uncomment when TimeDelta is implemented\n", - "# (stamps - pd.Timestamp(\"1970-01-01\")) // pd.Timedelta(\"1s\")" + "pd.Series(pd.date_range(\"2000\", freq=\"D\", periods=3))" ] }, { "cell_type": "markdown", - "id": "9b5b10c8-e72f-4405-bdeb-1bced14c8edf", - "metadata": {}, + "id": "01df1208-2b68-4ccf-b0c3-461ddc08d8b1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### DateOffset objects" + "##### Series and DataFrame have extended data type support and functionality for datetime, timedelta and Period data when passed into those constructors. DateOffset data however will be stored as object data." ] }, { "cell_type": "code", "execution_count": 15, - "id": "1febbd6a-1b57-4e6a-a48a-3eac565ad61d", + "id": "6710ed2a-053e-459c-8752-acdf07d5e362", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:51.876484Z", + "iopub.status.busy": "2024-08-29T21:12:51.876122Z", + "iopub.status.idle": "2024-08-29T21:12:52.416975Z", + "shell.execute_reply": "2024-08-29T21:12:52.416640Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "pandas type period[M] is not implemented", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:256\u001b[0m, in \u001b[0;36mTypeMapper.to_snowflake\u001b[0;34m(cls, p)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 256\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mPANDAS_TO_SNOWFLAKE_MAP\u001b[49m\u001b[43m[\u001b[49m\u001b[43mp\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", + "\u001b[0;31mKeyError\u001b[0m: period[M]", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mperiod_range\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m1/1/2011\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mM\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:156\u001b[0m, in \u001b[0;36mSeries.__init__\u001b[0;34m(self, data, index, dtype, name, copy, fastpath, query_compiler)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28misinstance\u001b[39m(data, (pandas\u001b[38;5;241m.\u001b[39mSeries, pandas\u001b[38;5;241m.\u001b[39mIndex, pd\u001b[38;5;241m.\u001b[39mIndex))\n\u001b[1;32m 152\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m data\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 153\u001b[0m ):\n\u001b[1;32m 154\u001b[0m name \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mname\n\u001b[0;32m--> 156\u001b[0m query_compiler \u001b[38;5;241m=\u001b[39m \u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43mfastpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfastpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m_query_compiler\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_query_compiler \u001b[38;5;241m=\u001b[39m query_compiler\u001b[38;5;241m.\u001b[39mcolumnarize()\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/utils.py:104\u001b[0m, in \u001b[0;36mfrom_pandas\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m# from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msnowflake\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msnowpark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataFrame\n\u001b[0;32m--> 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(query_compiler\u001b[38;5;241m=\u001b[39m\u001b[43mFactoryDispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/dispatcher.py:132\u001b[0m, in \u001b[0;36mFactoryDispatcher.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;129m@_inherit_docstrings\u001b[39m(factories\u001b[38;5;241m.\u001b[39mBaseFactory\u001b[38;5;241m.\u001b[39m_from_pandas)\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_factory\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_from_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/factories.py:172\u001b[0m, in \u001b[0;36mBaseFactory._from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;129m@doc\u001b[39m(\n\u001b[1;32m 166\u001b[0m _doc_io_method_template,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 170\u001b[0m )\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_from_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/io/snow_io.py:177\u001b[0m, in \u001b[0;36mPandasOnSnowflakeIO.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df: pandas\u001b[38;5;241m.\u001b[39mDataFrame):\n\u001b[1;32m 173\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"invoke construction from pandas DataFrame (io backup methods), df is a pandas.DataFrame living in main-memory\u001b[39;00m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;124;03m df: An existing (native) pandas DataFrame\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 177\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery_compiler_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:584\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_pandas\u001b[0;34m(cls, df, *args, **kwargs)\u001b[0m\n\u001b[1;32m 579\u001b[0m current_df_data_column_snowflake_quoted_identifiers\u001b[38;5;241m.\u001b[39mappend(\n\u001b[1;32m 580\u001b[0m row_position_snowflake_quoted_identifier\n\u001b[1;32m 581\u001b[0m )\n\u001b[1;32m 583\u001b[0m \u001b[38;5;66;03m# create snowpark df\u001b[39;00m\n\u001b[0;32m--> 584\u001b[0m snowpark_pandas_types, snowpark_types \u001b[38;5;241m=\u001b[39m \u001b[43minfer_snowpark_types_from_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 585\u001b[0m ordered_dataframe \u001b[38;5;241m=\u001b[39m create_ordered_dataframe_from_pandas(\n\u001b[1;32m 586\u001b[0m df,\n\u001b[1;32m 587\u001b[0m snowflake_quoted_identifiers\u001b[38;5;241m=\u001b[39mcurrent_df_data_column_snowflake_quoted_identifiers,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 592\u001b[0m row_position_snowflake_quoted_identifier\u001b[38;5;241m=\u001b[39mrow_position_snowflake_quoted_identifier,\n\u001b[1;32m 593\u001b[0m )\n\u001b[1;32m 595\u001b[0m \u001b[38;5;66;03m# construct the internal frame for the dataframe\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/utils.py:1051\u001b[0m, in \u001b[0;36minfer_snowpark_types_from_pandas\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 1049\u001b[0m snowpark_types \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _, column \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m-> 1051\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m \u001b[43minfer_series_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(snowflake_type, SnowparkPandasType):\n\u001b[1;32m 1053\u001b[0m snowpark_types\u001b[38;5;241m.\u001b[39mappend(snowflake_type\u001b[38;5;241m.\u001b[39msnowpark_type)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:199\u001b[0m, in \u001b[0;36minfer_series_type\u001b[0;34m(series)\u001b[0m\n\u001b[1;32m 197\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m VariantType()\n\u001b[1;32m 198\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 199\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m \u001b[43mTypeMapper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_snowflake\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m snowflake_type\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:258\u001b[0m, in \u001b[0;36mTypeMapper.to_snowflake\u001b[0;34m(cls, p)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m PANDAS_TO_SNOWFLAKE_MAP[p]\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[0;32m--> 258\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpandas type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mp\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not implemented\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: pandas type period[M] is not implemented" + ] + } + ], + "source": [ + "pd.Series(pd.period_range(\"1/1/2011\", freq=\"M\", periods=3))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "2ce1f555-cb1d-46e6-a0ee-831a90dbd594", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:41.365775Z", - "iopub.status.busy": "2024-08-28T17:27:41.365599Z", - "iopub.status.idle": "2024-08-28T17:27:41.413954Z", - "shell.execute_reply": "2024-08-28T17:27:41.413683Z" + "iopub.execute_input": "2024-08-29T21:12:52.421484Z", + "iopub.status.busy": "2024-08-29T21:12:52.421364Z", + "iopub.status.idle": "2024-08-29T21:12:52.657344Z", + "shell.execute_reply": "2024-08-29T21:12:52.656976Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Object of type DateOffset is not JSON serializable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDateOffset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDateOffset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:156\u001b[0m, in \u001b[0;36mSeries.__init__\u001b[0;34m(self, data, index, dtype, name, copy, fastpath, query_compiler)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28misinstance\u001b[39m(data, (pandas\u001b[38;5;241m.\u001b[39mSeries, pandas\u001b[38;5;241m.\u001b[39mIndex, pd\u001b[38;5;241m.\u001b[39mIndex))\n\u001b[1;32m 152\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m data\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 153\u001b[0m ):\n\u001b[1;32m 154\u001b[0m name \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mname\n\u001b[0;32m--> 156\u001b[0m query_compiler \u001b[38;5;241m=\u001b[39m \u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43mfastpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfastpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m_query_compiler\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_query_compiler \u001b[38;5;241m=\u001b[39m query_compiler\u001b[38;5;241m.\u001b[39mcolumnarize()\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/utils.py:104\u001b[0m, in \u001b[0;36mfrom_pandas\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m# from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msnowflake\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msnowpark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataFrame\n\u001b[0;32m--> 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(query_compiler\u001b[38;5;241m=\u001b[39m\u001b[43mFactoryDispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/dispatcher.py:132\u001b[0m, in \u001b[0;36mFactoryDispatcher.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;129m@_inherit_docstrings\u001b[39m(factories\u001b[38;5;241m.\u001b[39mBaseFactory\u001b[38;5;241m.\u001b[39m_from_pandas)\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_factory\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_from_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/factories.py:172\u001b[0m, in \u001b[0;36mBaseFactory._from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;129m@doc\u001b[39m(\n\u001b[1;32m 166\u001b[0m _doc_io_method_template,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 170\u001b[0m )\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_from_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/io/snow_io.py:177\u001b[0m, in \u001b[0;36mPandasOnSnowflakeIO.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df: pandas\u001b[38;5;241m.\u001b[39mDataFrame):\n\u001b[1;32m 173\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"invoke construction from pandas DataFrame (io backup methods), df is a pandas.DataFrame living in main-memory\u001b[39;00m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;124;03m df: An existing (native) pandas DataFrame\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 177\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery_compiler_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:585\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_pandas\u001b[0;34m(cls, df, *args, **kwargs)\u001b[0m\n\u001b[1;32m 583\u001b[0m \u001b[38;5;66;03m# create snowpark df\u001b[39;00m\n\u001b[1;32m 584\u001b[0m snowpark_pandas_types, snowpark_types \u001b[38;5;241m=\u001b[39m infer_snowpark_types_from_pandas(df)\n\u001b[0;32m--> 585\u001b[0m ordered_dataframe \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_ordered_dataframe_from_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 586\u001b[0m \u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 587\u001b[0m \u001b[43m \u001b[49m\u001b[43msnowflake_quoted_identifiers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurrent_df_data_column_snowflake_quoted_identifiers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 588\u001b[0m \u001b[43m \u001b[49m\u001b[43msnowpark_types\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msnowpark_types\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mordering_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mOrderingColumn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrow_position_snowflake_quoted_identifier\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mrow_position_snowflake_quoted_identifier\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrow_position_snowflake_quoted_identifier\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[38;5;66;03m# construct the internal frame for the dataframe\u001b[39;00m\n\u001b[1;32m 596\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(\n\u001b[1;32m 597\u001b[0m InternalFrame\u001b[38;5;241m.\u001b[39mcreate(\n\u001b[1;32m 598\u001b[0m ordered_dataframe\u001b[38;5;241m=\u001b[39mordered_dataframe,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 617\u001b[0m )\n\u001b[1;32m 618\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/utils.py:1114\u001b[0m, in \u001b[0;36mcreate_ordered_dataframe_from_pandas\u001b[0;34m(df, snowflake_quoted_identifiers, snowpark_types, ordering_columns, row_position_snowflake_quoted_identifier)\u001b[0m\n\u001b[1;32m 1103\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m y \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(data[x])):\n\u001b[1;32m 1104\u001b[0m data[x][y] \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1105\u001b[0m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1106\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m isna_data[x][y]\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1111\u001b[0m )\n\u001b[1;32m 1112\u001b[0m )\n\u001b[0;32m-> 1114\u001b[0m snowpark_df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_dataframe\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1115\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1116\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStructType\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1117\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m 1118\u001b[0m \u001b[43m \u001b[49m\u001b[43mStructField\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumn_identifier\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mid\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdatatype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meach_datatype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1119\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mid\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43meach_datatype\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mzip\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1120\u001b[0m \u001b[43m \u001b[49m\u001b[43msnowflake_quoted_identifiers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msnowpark_types\u001b[49m\n\u001b[1;32m 1121\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1122\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 1123\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1124\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m OrderedDataFrame(\n\u001b[1;32m 1126\u001b[0m DataFrameReference(snowpark_df, snowflake_quoted_identifiers),\n\u001b[1;32m 1127\u001b[0m projected_column_snowflake_quoted_identifiers\u001b[38;5;241m=\u001b[39msnowflake_quoted_identifiers,\n\u001b[1;32m 1128\u001b[0m ordering_columns\u001b[38;5;241m=\u001b[39mordering_columns,\n\u001b[1;32m 1129\u001b[0m row_position_snowflake_quoted_identifier\u001b[38;5;241m=\u001b[39mrow_position_snowflake_quoted_identifier,\n\u001b[1;32m 1130\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/session.py:2807\u001b[0m, in \u001b[0;36mSession.create_dataframe\u001b[0;34m(self, data, schema)\u001b[0m\n\u001b[1;32m 2805\u001b[0m converted_row\u001b[38;5;241m.\u001b[39mappend(json\u001b[38;5;241m.\u001b[39mdumps(value, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m=\u001b[39mPythonObjJSONEncoder))\n\u001b[1;32m 2806\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_type, VariantType):\n\u001b[0;32m-> 2807\u001b[0m converted_row\u001b[38;5;241m.\u001b[39mappend(\u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mPythonObjJSONEncoder\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 2808\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_type, GeographyType):\n\u001b[1;32m 2809\u001b[0m converted_row\u001b[38;5;241m.\u001b[39mappend(value)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/json/__init__.py:234\u001b[0m, in \u001b[0;36mdumps\u001b[0;34m(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONEncoder\n\u001b[0;32m--> 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43mskipkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskipkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mensure_ascii\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_ascii\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_circular\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_circular\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_nan\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_nan\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mseparators\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseparators\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefault\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msort_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/json/encoder.py:199\u001b[0m, in \u001b[0;36mJSONEncoder.encode\u001b[0;34m(self, o)\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m encode_basestring(o)\n\u001b[1;32m 196\u001b[0m \u001b[38;5;66;03m# This doesn't pass the iterator directly to ''.join() because the\u001b[39;00m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;66;03m# exceptions aren't as detailed. The list call should be roughly\u001b[39;00m\n\u001b[1;32m 198\u001b[0m \u001b[38;5;66;03m# equivalent to the PySequence_Fast that ''.join() would do.\u001b[39;00m\n\u001b[0;32m--> 199\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miterencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_one_shot\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(chunks, (\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m)):\n\u001b[1;32m 201\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(chunks)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/json/encoder.py:257\u001b[0m, in \u001b[0;36mJSONEncoder.iterencode\u001b[0;34m(self, o, _one_shot)\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 253\u001b[0m _iterencode \u001b[38;5;241m=\u001b[39m _make_iterencode(\n\u001b[1;32m 254\u001b[0m markers, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault, _encoder, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindent, floatstr,\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkey_separator, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mitem_separator, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msort_keys,\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mskipkeys, _one_shot)\n\u001b[0;32m--> 257\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_iterencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/utils.py:651\u001b[0m, in \u001b[0;36mPythonObjJSONEncoder.default\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 649\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m value\u001b[38;5;241m.\u001b[39mtolist()\n\u001b[1;32m 650\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 651\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/json/encoder.py:179\u001b[0m, in \u001b[0;36mJSONEncoder.default\u001b[0;34m(self, o)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdefault\u001b[39m(\u001b[38;5;28mself\u001b[39m, o):\n\u001b[1;32m 161\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Implement this method in a subclass such that it returns\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;124;03m a serializable object for ``o``, or calls the base implementation\u001b[39;00m\n\u001b[1;32m 163\u001b[0m \u001b[38;5;124;03m (to raise a ``TypeError``).\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 177\u001b[0m \n\u001b[1;32m 178\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mObject of type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mo\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mis not JSON serializable\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "\u001b[0;31mTypeError\u001b[0m: Object of type DateOffset is not JSON serializable" + ] } + ], + "source": [ + "pd.Series([pd.DateOffset(1), pd.DateOffset(2)])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "a6befac8-06ca-45c2-ade8-03736ac7f7ed", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:52.663945Z", + "iopub.status.busy": "2024-08-29T21:12:52.663811Z", + "iopub.status.idle": "2024-08-29T21:12:53.689964Z", + "shell.execute_reply": "2024-08-29T21:12:53.689137Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')" + "0 2011-01-31\n", + "1 2011-02-28\n", + "2 2011-03-31\n", + "dtype: datetime64[ns]" ] }, - "execution_count": 15, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ts = pd.Timestamp(\"2016-10-30 00:00:00\", tz=\"Europe/Helsinki\")\n", - "\n", - "ts + pd.Timedelta(days=1)" + "pd.Series(pd.date_range(\"1/1/2011\", freq=\"ME\", periods=3))" + ] + }, + { + "cell_type": "markdown", + "id": "a433d9b6-cfa8-4a1a-bc65-7253a0ab1fc1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Lastly, pandas represents null date times, time deltas, and time spans as NaT which is useful for representing missing or null date like values and behaves similar as np.nan does for float data." ] }, { "cell_type": "code", - "execution_count": 16, - "id": "b8d03fb0-826f-4698-a6d0-f2b63f7d38dc", + "execution_count": 18, + "id": "cf9b85c2-c342-492a-8bef-d8dad541c8d2", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:41.415634Z", - "iopub.status.busy": "2024-08-28T17:27:41.415523Z", - "iopub.status.idle": "2024-08-28T17:27:41.417793Z", - "shell.execute_reply": "2024-08-28T17:27:41.417561Z" - } + "iopub.execute_input": "2024-08-29T21:12:53.694436Z", + "iopub.status.busy": "2024-08-29T21:12:53.694067Z", + "iopub.status.idle": "2024-08-29T21:12:53.699310Z", + "shell.execute_reply": "2024-08-29T21:12:53.698757Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')" + "NaT" ] }, - "execution_count": 16, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ts + pd.DateOffset(days=1)" + "pd.Timestamp(pd.NaT)" ] }, { - "cell_type": "markdown", - "id": "00d1f5cf-c073-4c60-949b-404953b80000", - "metadata": {}, + "cell_type": "code", + "execution_count": 19, + "id": "28c9907e-eaef-44c1-8a96-2e4a8b0451dc", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:53.704104Z", + "iopub.status.busy": "2024-08-29T21:12:53.703792Z", + "iopub.status.idle": "2024-08-29T21:12:53.707678Z", + "shell.execute_reply": "2024-08-29T21:12:53.707062Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "NaT" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "### Timestamp Binary Operations" + "pd.Timedelta(pd.NaT)" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "dd818a8d-97c1-46f3-b29f-499ba92f22ae", + "execution_count": 20, + "id": "7ba5293a-b234-4a4a-a276-f5b9ebc6512c", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:41.419736Z", - "iopub.status.busy": "2024-08-28T17:27:41.419620Z", - "iopub.status.idle": "2024-08-28T17:27:42.097520Z", - "shell.execute_reply": "2024-08-28T17:27:42.097100Z" - } + "iopub.execute_input": "2024-08-29T21:12:53.711155Z", + "iopub.status.busy": "2024-08-29T21:12:53.710940Z", + "iopub.status.idle": "2024-08-29T21:12:53.714655Z", + "shell.execute_reply": "2024-08-29T21:12:53.713901Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timedelta('396 days 03:00:00')" + "NaT" ] }, - "execution_count": 17, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pd.to_datetime('2018-10-26 12:00:00') - pd.to_datetime('2017-09-25 09:00:00')" + "pd.Period(pd.NaT)" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "7c9a87d2-7883-46a6-8433-dfa5900ca9b0", + "execution_count": 21, + "id": "41ff9cef-5957-45ac-b9d6-994ffb813cf3", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:42.099835Z", - "iopub.status.busy": "2024-08-28T17:27:42.099657Z", - "iopub.status.idle": "2024-08-28T17:27:42.102502Z", - "shell.execute_reply": "2024-08-28T17:27:42.102144Z" - } + "iopub.execute_input": "2024-08-29T21:12:53.718345Z", + "iopub.status.busy": "2024-08-29T21:12:53.718130Z", + "iopub.status.idle": "2024-08-29T21:12:53.721214Z", + "shell.execute_reply": "2024-08-29T21:12:53.720794Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timedelta('6 days 07:00:00')" + "False" ] }, - "execution_count": 18, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pd.Timestamp(\"2014-08-01 10:00\") - pd.Timestamp(\"2014-07-26 03:00\")" + "# Equality acts as np.nan would\n", + "pd.NaT == pd.NaT" + ] + }, + { + "cell_type": "markdown", + "id": "55cdacb3-46ca-465e-869a-f8887852e401", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Timestamps vs. time spans" + ] + }, + { + "cell_type": "markdown", + "id": "52c2704d-375b-42ff-9b9c-e48883d3b43f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Timestamp and Period can serve as an index. Lists of Timestamp and Period are automatically coerced to DatetimeIndex and PeriodIndex respectively." ] }, { "cell_type": "code", - "execution_count": 19, - "id": "e78454b1-0d4c-42bc-a127-b21a4a7f09cf", + "execution_count": 22, + "id": "2b248408-76d5-4d95-9dc5-164ba9532309", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:42.104922Z", - "iopub.status.busy": "2024-08-28T17:27:42.104781Z", - "iopub.status.idle": "2024-08-28T17:27:42.107600Z", - "shell.execute_reply": "2024-08-28T17:27:42.107293Z" - } + "iopub.execute_input": "2024-08-29T21:12:53.724462Z", + "iopub.status.busy": "2024-08-29T21:12:53.724294Z", + "iopub.status.idle": "2024-08-29T21:12:54.022295Z", + "shell.execute_reply": "2024-08-29T21:12:54.020998Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timedelta('682 days 03:00:00')" + "snowflake.snowpark.modin.plugin.extensions.datetime_index.DatetimeIndex" ] }, - "execution_count": 19, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pd.Timestamp(year=2017, month=1, day=1, hour=12) - pd.Timestamp(year=2015, month=2, day=19, hour=9)" + "dates = [\n", + " pd.Timestamp(\"2012-05-01\"),\n", + " pd.Timestamp(\"2012-05-02\"),\n", + " pd.Timestamp(\"2012-05-03\"),\n", + "]\n", + "\n", + "\n", + "ts = pd.Series(np.random.randn(3), dates)\n", + "\n", + "type(ts.index)" ] }, { "cell_type": "code", - "execution_count": 20, - "id": "2534d141-1862-4901-ba70-7ed73ab9abdd", + "execution_count": 23, + "id": "c7cca402-6c5f-475a-aab7-77957f656690", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:42.109761Z", - "iopub.status.busy": "2024-08-28T17:27:42.109628Z", - "iopub.status.idle": "2024-08-28T17:27:42.763799Z", - "shell.execute_reply": "2024-08-28T17:27:42.763158Z" - } + "iopub.execute_input": "2024-08-29T21:12:54.033215Z", + "iopub.status.busy": "2024-08-29T21:12:54.032640Z", + "iopub.status.idle": "2024-08-29T21:12:54.427276Z", + "shell.execute_reply": "2024-08-29T21:12:54.426196Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timedelta('-31 days +03:09:02')" + "DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)" ] }, - "execution_count": 20, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts.index" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b88e7085-4cfd-456d-8422-f440ba02ed27", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:54.432281Z", + "iopub.status.busy": "2024-08-29T21:12:54.431935Z", + "iopub.status.idle": "2024-08-29T21:12:54.880207Z", + "shell.execute_reply": "2024-08-29T21:12:54.879027Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2012-05-01 0.806339\n", + "2012-05-02 0.151004\n", + "2012-05-03 0.198380\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "126e9c1c-0006-44a7-a05d-a8ccb0aca029", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:54.886147Z", + "iopub.status.busy": "2024-08-29T21:12:54.885761Z", + "iopub.status.idle": "2024-08-29T21:12:54.952853Z", + "shell.execute_reply": "2024-08-29T21:12:54.952527Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "pandas type period[M] is not implemented", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:256\u001b[0m, in \u001b[0;36mTypeMapper.to_snowflake\u001b[0;34m(cls, p)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 256\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mPANDAS_TO_SNOWFLAKE_MAP\u001b[49m\u001b[43m[\u001b[49m\u001b[43mp\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", + "\u001b[0;31mKeyError\u001b[0m: period[M]", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[25], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m periods \u001b[38;5;241m=\u001b[39m [pd\u001b[38;5;241m.\u001b[39mPeriod(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-01\u001b[39m\u001b[38;5;124m\"\u001b[39m), pd\u001b[38;5;241m.\u001b[39mPeriod(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-02\u001b[39m\u001b[38;5;124m\"\u001b[39m), pd\u001b[38;5;241m.\u001b[39mPeriod(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-03\u001b[39m\u001b[38;5;124m\"\u001b[39m)]\n\u001b[0;32m----> 3\u001b[0m ts \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mtype\u001b[39m(ts\u001b[38;5;241m.\u001b[39mindex)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:156\u001b[0m, in \u001b[0;36mSeries.__init__\u001b[0;34m(self, data, index, dtype, name, copy, fastpath, query_compiler)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28misinstance\u001b[39m(data, (pandas\u001b[38;5;241m.\u001b[39mSeries, pandas\u001b[38;5;241m.\u001b[39mIndex, pd\u001b[38;5;241m.\u001b[39mIndex))\n\u001b[1;32m 152\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m data\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 153\u001b[0m ):\n\u001b[1;32m 154\u001b[0m name \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mname\n\u001b[0;32m--> 156\u001b[0m query_compiler \u001b[38;5;241m=\u001b[39m \u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43mfastpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfastpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m_query_compiler\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_query_compiler \u001b[38;5;241m=\u001b[39m query_compiler\u001b[38;5;241m.\u001b[39mcolumnarize()\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/utils.py:104\u001b[0m, in \u001b[0;36mfrom_pandas\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m# from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msnowflake\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msnowpark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataFrame\n\u001b[0;32m--> 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(query_compiler\u001b[38;5;241m=\u001b[39m\u001b[43mFactoryDispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/dispatcher.py:132\u001b[0m, in \u001b[0;36mFactoryDispatcher.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;129m@_inherit_docstrings\u001b[39m(factories\u001b[38;5;241m.\u001b[39mBaseFactory\u001b[38;5;241m.\u001b[39m_from_pandas)\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_factory\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_from_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/factories.py:172\u001b[0m, in \u001b[0;36mBaseFactory._from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;129m@doc\u001b[39m(\n\u001b[1;32m 166\u001b[0m _doc_io_method_template,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 170\u001b[0m )\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_from_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/io/snow_io.py:177\u001b[0m, in \u001b[0;36mPandasOnSnowflakeIO.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df: pandas\u001b[38;5;241m.\u001b[39mDataFrame):\n\u001b[1;32m 173\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"invoke construction from pandas DataFrame (io backup methods), df is a pandas.DataFrame living in main-memory\u001b[39;00m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;124;03m df: An existing (native) pandas DataFrame\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 177\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery_compiler_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:584\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_pandas\u001b[0;34m(cls, df, *args, **kwargs)\u001b[0m\n\u001b[1;32m 579\u001b[0m current_df_data_column_snowflake_quoted_identifiers\u001b[38;5;241m.\u001b[39mappend(\n\u001b[1;32m 580\u001b[0m row_position_snowflake_quoted_identifier\n\u001b[1;32m 581\u001b[0m )\n\u001b[1;32m 583\u001b[0m \u001b[38;5;66;03m# create snowpark df\u001b[39;00m\n\u001b[0;32m--> 584\u001b[0m snowpark_pandas_types, snowpark_types \u001b[38;5;241m=\u001b[39m \u001b[43minfer_snowpark_types_from_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 585\u001b[0m ordered_dataframe \u001b[38;5;241m=\u001b[39m create_ordered_dataframe_from_pandas(\n\u001b[1;32m 586\u001b[0m df,\n\u001b[1;32m 587\u001b[0m snowflake_quoted_identifiers\u001b[38;5;241m=\u001b[39mcurrent_df_data_column_snowflake_quoted_identifiers,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 592\u001b[0m row_position_snowflake_quoted_identifier\u001b[38;5;241m=\u001b[39mrow_position_snowflake_quoted_identifier,\n\u001b[1;32m 593\u001b[0m )\n\u001b[1;32m 595\u001b[0m \u001b[38;5;66;03m# construct the internal frame for the dataframe\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/utils.py:1051\u001b[0m, in \u001b[0;36minfer_snowpark_types_from_pandas\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 1049\u001b[0m snowpark_types \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _, column \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m-> 1051\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m \u001b[43minfer_series_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(snowflake_type, SnowparkPandasType):\n\u001b[1;32m 1053\u001b[0m snowpark_types\u001b[38;5;241m.\u001b[39mappend(snowflake_type\u001b[38;5;241m.\u001b[39msnowpark_type)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:199\u001b[0m, in \u001b[0;36minfer_series_type\u001b[0;34m(series)\u001b[0m\n\u001b[1;32m 197\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m VariantType()\n\u001b[1;32m 198\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 199\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m \u001b[43mTypeMapper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_snowflake\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m snowflake_type\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:258\u001b[0m, in \u001b[0;36mTypeMapper.to_snowflake\u001b[0;34m(cls, p)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m PANDAS_TO_SNOWFLAKE_MAP[p]\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[0;32m--> 258\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpandas type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mp\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not implemented\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: pandas type period[M] is not implemented" + ] + } + ], + "source": [ + "periods = [pd.Period(\"2012-01\"), pd.Period(\"2012-02\"), pd.Period(\"2012-03\")]\n", + "\n", + "ts = pd.Series(np.random.randn(3), periods)\n", + "\n", + "type(ts.index)" + ] + }, + { + "cell_type": "markdown", + "id": "f927521e-535f-4d26-8a4c-d8fda7319000", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Converting to timestamps" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "bad4e888-44cd-4c49-8c21-be7590a8ae3f", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:54.957350Z", + "iopub.status.busy": "2024-08-29T21:12:54.957223Z", + "iopub.status.idle": "2024-08-29T21:12:56.239436Z", + "shell.execute_reply": "2024-08-29T21:12:56.239161Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:snowflake.snowpark.modin.plugin.utils.warning_message:`to_datetime` implementation may have mismatches with pandas:\n", + "Snowflake automatic format detection is used when a format is not provided.In this case Snowflake's auto format may yield different result values compared to pandas.See https://docs.snowflake.com/en/sql-reference/date-time-input-output#supported-formats-for-auto-detection for details.\n" + ] + }, + { + "ename": "SnowparkSQLException", + "evalue": "(1304): 01b6ae78-0d07-1a71-0002-990387e5d42b: 100035 (22007): Timestamp 'Jul 31, 2009' is not recognized", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mSnowparkSQLException\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/IPython/core/formatters.py:708\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 701\u001b[0m stream \u001b[38;5;241m=\u001b[39m StringIO()\n\u001b[1;32m 702\u001b[0m printer \u001b[38;5;241m=\u001b[39m pretty\u001b[38;5;241m.\u001b[39mRepresentationPrinter(stream, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose,\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_width, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnewline,\n\u001b[1;32m 704\u001b[0m max_seq_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_seq_length,\n\u001b[1;32m 705\u001b[0m singleton_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msingleton_printers,\n\u001b[1;32m 706\u001b[0m type_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtype_printers,\n\u001b[1;32m 707\u001b[0m deferred_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdeferred_printers)\n\u001b[0;32m--> 708\u001b[0m \u001b[43mprinter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpretty\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 709\u001b[0m printer\u001b[38;5;241m.\u001b[39mflush()\n\u001b[1;32m 710\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m stream\u001b[38;5;241m.\u001b[39mgetvalue()\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/IPython/lib/pretty.py:410\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m meth(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mobject\u001b[39m \\\n\u001b[1;32m 409\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__repr__\u001b[39m\u001b[38;5;124m'\u001b[39m)):\n\u001b[0;32m--> 410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_repr_pprint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcycle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _default_pprint(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[1;32m 413\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/IPython/lib/pretty.py:778\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[0;34m(obj, p, cycle)\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[1;32m 777\u001b[0m \u001b[38;5;66;03m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[0;32m--> 778\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mrepr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 779\u001b[0m lines \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39msplitlines()\n\u001b[1;32m 780\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m p\u001b[38;5;241m.\u001b[39mgroup():\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:477\u001b[0m, in \u001b[0;36mSeries.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 470\u001b[0m num_rows \u001b[38;5;241m=\u001b[39m pandas\u001b[38;5;241m.\u001b[39mget_option(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisplay.max_rows\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m60\u001b[39m\n\u001b[1;32m 471\u001b[0m num_cols \u001b[38;5;241m=\u001b[39m pandas\u001b[38;5;241m.\u001b[39mget_option(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisplay.max_columns\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m20\u001b[39m\n\u001b[1;32m 473\u001b[0m (\n\u001b[1;32m 474\u001b[0m row_count,\n\u001b[1;32m 475\u001b[0m col_count,\n\u001b[1;32m 476\u001b[0m temp_df,\n\u001b[0;32m--> 477\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query_compiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild_repr_df\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_rows\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_cols\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(temp_df, pandas\u001b[38;5;241m.\u001b[39mDataFrame) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m temp_df\u001b[38;5;241m.\u001b[39mempty:\n\u001b[1;32m 479\u001b[0m temp_df \u001b[38;5;241m=\u001b[39m temp_df\u001b[38;5;241m.\u001b[39miloc[:, \u001b[38;5;241m0\u001b[39m]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:11699\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.build_repr_df\u001b[0;34m(self, num_rows_to_display, num_cols_to_display, times_symbol)\u001b[0m\n\u001b[1;32m 11697\u001b[0m \u001b[38;5;66;03m# retrieve frame as pandas object\u001b[39;00m\n\u001b[1;32m 11698\u001b[0m new_qc \u001b[38;5;241m=\u001b[39m SnowflakeQueryCompiler(new_frame)\n\u001b[0;32m> 11699\u001b[0m pandas_frame \u001b[38;5;241m=\u001b[39m \u001b[43mnew_qc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11701\u001b[0m \u001b[38;5;66;03m# remove last column after first retrieving row count\u001b[39;00m\n\u001b[1;32m 11702\u001b[0m row_count \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;241m0\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mlen\u001b[39m(pandas_frame) \u001b[38;5;28;01melse\u001b[39;00m pandas_frame\u001b[38;5;241m.\u001b[39miat[\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:754\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.to_pandas\u001b[0;34m(self, statement_params, **kwargs)\u001b[0m\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mto_pandas\u001b[39m(\n\u001b[1;32m 738\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 739\u001b[0m \u001b[38;5;241m*\u001b[39m,\n\u001b[1;32m 740\u001b[0m statement_params: Optional[\u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mstr\u001b[39m]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 741\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 742\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m native_pd\u001b[38;5;241m.\u001b[39mDataFrame:\n\u001b[1;32m 743\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 744\u001b[0m \u001b[38;5;124;03m Convert underlying query compilers data to ``pandas.DataFrame``.\u001b[39;00m\n\u001b[1;32m 745\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 752\u001b[0m \n\u001b[1;32m 753\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 754\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_modin_frame\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstatement_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/frame.py:895\u001b[0m, in \u001b[0;36mInternalFrame.to_pandas\u001b[0;34m(self, statement_params, **kwargs)\u001b[0m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 881\u001b[0m \u001b[38;5;124;03mConvert this InternalFrame to ``pandas.DataFrame``.\u001b[39;00m\n\u001b[1;32m 882\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 888\u001b[0m \u001b[38;5;124;03m The InternalFrame converted to pandas.\u001b[39;00m\n\u001b[1;32m 889\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 890\u001b[0m ordered_dataframe \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mordered_dataframe\u001b[38;5;241m.\u001b[39mselect(\n\u001b[1;32m 891\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex_column_snowflake_quoted_identifiers\n\u001b[1;32m 892\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_column_snowflake_quoted_identifiers\n\u001b[1;32m 893\u001b[0m )\n\u001b[0;32m--> 895\u001b[0m native_df \u001b[38;5;241m=\u001b[39m \u001b[43msnowpark_to_pandas_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 896\u001b[0m \u001b[43m \u001b[49m\u001b[43mordered_dataframe\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 897\u001b[0m \u001b[43m \u001b[49m\u001b[43mstatement_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstatement_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 898\u001b[0m \u001b[43m \u001b[49m\u001b[43mcached_snowpark_pandas_types\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcached_index_column_snowpark_pandas_types\u001b[49m\n\u001b[1;32m 899\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcached_data_column_snowpark_pandas_types\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 900\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 901\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 903\u001b[0m \u001b[38;5;66;03m# to_pandas() does not preserve the index information and will just return a\u001b[39;00m\n\u001b[1;32m 904\u001b[0m \u001b[38;5;66;03m# RangeIndex. Therefore, we need to set the index column manually\u001b[39;00m\n\u001b[1;32m 905\u001b[0m native_df\u001b[38;5;241m.\u001b[39mset_index(\n\u001b[1;32m 906\u001b[0m [\n\u001b[1;32m 907\u001b[0m extract_pandas_label_from_snowflake_quoted_identifier(identifier)\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 910\u001b[0m inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 911\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/utils.py:1334\u001b[0m, in \u001b[0;36msnowpark_to_pandas_helper\u001b[0;34m(ordered_dataframe, cached_snowpark_pandas_types, statement_params, **kwargs)\u001b[0m\n\u001b[1;32m 1329\u001b[0m \u001b[38;5;66;03m# ensure that snowpark_df has unique identifiers, so the native pandas DataFrame object created here\u001b[39;00m\n\u001b[1;32m 1330\u001b[0m \u001b[38;5;66;03m# also does have unique column names which is a prerequisite for the post-processing logic following.\u001b[39;00m\n\u001b[1;32m 1331\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m is_duplicate_free(\n\u001b[1;32m 1332\u001b[0m column_identifiers \u001b[38;5;241m+\u001b[39m variant_type_typeof_identifiers\n\u001b[1;32m 1333\u001b[0m ), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSnowpark DataFrame to convert must have unique column identifiers\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1334\u001b[0m pandas_df \u001b[38;5;241m=\u001b[39m \u001b[43mordered_dataframe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstatement_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstatement_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1336\u001b[0m \u001b[38;5;66;03m# Step 3: perform post-processing\u001b[39;00m\n\u001b[1;32m 1337\u001b[0m \u001b[38;5;66;03m# If the dataframe has no rows, do not perform this. Using the result of the `apply` on\u001b[39;00m\n\u001b[1;32m 1338\u001b[0m \u001b[38;5;66;03m# an empty frame would erroneously update the dtype of the column to be `float64` instead of `object`.\u001b[39;00m\n\u001b[1;32m 1339\u001b[0m \u001b[38;5;66;03m# TODO SNOW-982779: verify correctness of this behavior\u001b[39;00m\n\u001b[1;32m 1340\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pandas_df\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py:1786\u001b[0m, in \u001b[0;36mOrderedDataFrame.to_pandas\u001b[0;34m(self, statement_params, block, **kwargs)\u001b[0m\n\u001b[1;32m 1784\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1785\u001b[0m statement_params\u001b[38;5;241m.\u001b[39mupdate(get_default_snowpark_pandas_statement_params())\n\u001b[0;32m-> 1786\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msnowpark_dataframe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1787\u001b[0m \u001b[43m \u001b[49m\u001b[43mstatement_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstatement_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 1788\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/telemetry.py:153\u001b[0m, in \u001b[0;36mdf_collect_api_telemetry..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 152\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m args[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39m_session\u001b[38;5;241m.\u001b[39mquery_history() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 153\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 154\u001b[0m plan \u001b[38;5;241m=\u001b[39m args[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39m_select_statement \u001b[38;5;129;01mor\u001b[39;00m args[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39m_plan\n\u001b[1;32m 155\u001b[0m api_calls \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 156\u001b[0m \u001b[38;5;241m*\u001b[39mplan\u001b[38;5;241m.\u001b[39mapi_calls,\n\u001b[1;32m 157\u001b[0m {TelemetryField\u001b[38;5;241m.\u001b[39mNAME\u001b[38;5;241m.\u001b[39mvalue: \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataFrame.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m},\n\u001b[1;32m 158\u001b[0m ]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/dataframe.py:806\u001b[0m, in \u001b[0;36mDataFrame.to_pandas\u001b[0;34m(self, statement_params, block, **kwargs)\u001b[0m\n\u001b[1;32m 787\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 788\u001b[0m \u001b[38;5;124;03mExecutes the query representing this DataFrame and returns the result as a\u001b[39;00m\n\u001b[1;32m 789\u001b[0m \u001b[38;5;124;03m`pandas DataFrame `_.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 803\u001b[0m \u001b[38;5;124;03m :func:`Session.sql` can only be a SELECT statement.\u001b[39;00m\n\u001b[1;32m 804\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 805\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m open_telemetry_context_manager(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_pandas, \u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 806\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_session\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_conn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 807\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_plan\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 808\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_pandas\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 809\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 810\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_AsyncResultType\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPANDAS\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 811\u001b[0m \u001b[43m \u001b[49m\u001b[43m_statement_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcreate_or_update_statement_params_with_query_tag\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 812\u001b[0m \u001b[43m \u001b[49m\u001b[43mstatement_params\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_statement_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 813\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_session\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery_tag\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 814\u001b[0m \u001b[43m \u001b[49m\u001b[43mSKIP_LEVELS_TWO\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 815\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 816\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 817\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 819\u001b[0m \u001b[38;5;66;03m# if the returned result is not a pandas dataframe, raise Exception\u001b[39;00m\n\u001b[1;32m 820\u001b[0m \u001b[38;5;66;03m# this might happen when calling this method with non-select commands\u001b[39;00m\n\u001b[1;32m 821\u001b[0m \u001b[38;5;66;03m# e.g., session.sql(\"create ...\").to_pandas()\u001b[39;00m\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m block:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:526\u001b[0m, in \u001b[0;36mServerConnection.execute\u001b[0;34m(self, plan, to_pandas, to_iter, block, data_type, log_on_exception, case_sensitive, **kwargs)\u001b[0m\n\u001b[1;32m 516\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 517\u001b[0m is_in_stored_procedure()\n\u001b[1;32m 518\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m block\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 521\u001b[0m )\n\u001b[1;32m 522\u001b[0m ): \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 523\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 524\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAsync query is not supported in stored procedure yet\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 525\u001b[0m )\n\u001b[0;32m--> 526\u001b[0m result_set, result_meta \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_result_set\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 527\u001b[0m \u001b[43m \u001b[49m\u001b[43mplan\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 528\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_pandas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 529\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 530\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 531\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 532\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 533\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_on_exception\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlog_on_exception\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 534\u001b[0m \u001b[43m \u001b[49m\u001b[43mcase_sensitive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcase_sensitive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 535\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 536\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m block:\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result_set\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py:207\u001b[0m, in \u001b[0;36mSnowflakePlan.Decorator.wrap_exception..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 204\u001b[0m ne \u001b[38;5;241m=\u001b[39m SnowparkClientExceptionMessages\u001b[38;5;241m.\u001b[39mSQL_EXCEPTION_FROM_PROGRAMMING_ERROR(\n\u001b[1;32m 205\u001b[0m e\n\u001b[1;32m 206\u001b[0m )\n\u001b[0;32m--> 207\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ne\u001b[38;5;241m.\u001b[39mwith_traceback(tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py:138\u001b[0m, in \u001b[0;36mSnowflakePlan.Decorator.wrap_exception..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 138\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 139\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m snowflake\u001b[38;5;241m.\u001b[39mconnector\u001b[38;5;241m.\u001b[39merrors\u001b[38;5;241m.\u001b[39mProgrammingError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 140\u001b[0m query \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(e, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:630\u001b[0m, in \u001b[0;36mServerConnection.get_result_set\u001b[0;34m(self, plan, to_pandas, to_iter, block, data_type, log_on_exception, case_sensitive, ignore_results, **kwargs)\u001b[0m\n\u001b[1;32m 628\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m holder, id_ \u001b[38;5;129;01min\u001b[39;00m placeholders\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 629\u001b[0m final_query \u001b[38;5;241m=\u001b[39m final_query\u001b[38;5;241m.\u001b[39mreplace(holder, id_)\n\u001b[0;32m--> 630\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 631\u001b[0m \u001b[43m \u001b[49m\u001b[43mfinal_query\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 632\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_pandas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 633\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_iter\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mand\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmain_queries\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 634\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_ddl_on_temp_object\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mis_ddl_on_temp_object\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 635\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mis_last\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 636\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 637\u001b[0m \u001b[43m \u001b[49m\u001b[43masync_job_plan\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mplan\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 638\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_on_exception\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlog_on_exception\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 639\u001b[0m \u001b[43m \u001b[49m\u001b[43mcase_sensitive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcase_sensitive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 640\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 641\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_results\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 642\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 643\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 644\u001b[0m placeholders[query\u001b[38;5;241m.\u001b[39mquery_id_place_holder] \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 645\u001b[0m result[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msfqid\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_last \u001b[38;5;28;01melse\u001b[39;00m result\u001b[38;5;241m.\u001b[39mquery_id\n\u001b[1;32m 646\u001b[0m )\n\u001b[1;32m 647\u001b[0m result_meta \u001b[38;5;241m=\u001b[39m get_new_description(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cursor)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:125\u001b[0m, in \u001b[0;36mServerConnection._Decorator.wrap_exception..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SnowparkClientExceptionMessages\u001b[38;5;241m.\u001b[39mSERVER_SESSION_EXPIRED(\n\u001b[1;32m 122\u001b[0m ex\u001b[38;5;241m.\u001b[39mcause\n\u001b[1;32m 123\u001b[0m )\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ex\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:119\u001b[0m, in \u001b[0;36mServerConnection._Decorator.wrap_exception..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SnowparkClientExceptionMessages\u001b[38;5;241m.\u001b[39mSERVER_SESSION_HAS_BEEN_CLOSED()\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 119\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ReauthenticationRequest \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SnowparkClientExceptionMessages\u001b[38;5;241m.\u001b[39mSERVER_SESSION_EXPIRED(\n\u001b[1;32m 122\u001b[0m ex\u001b[38;5;241m.\u001b[39mcause\n\u001b[1;32m 123\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:433\u001b[0m, in \u001b[0;36mServerConnection.run_query\u001b[0;34m(self, query, to_pandas, to_iter, is_ddl_on_temp_object, block, data_type, async_job_plan, log_on_exception, case_sensitive, params, num_statements, ignore_results, **kwargs)\u001b[0m\n\u001b[1;32m 431\u001b[0m query_id_log \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m [queryID: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mex\u001b[38;5;241m.\u001b[39msfqid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(ex, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msfqid\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 432\u001b[0m logger\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to execute query\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquery_id_log\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquery\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mex\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 433\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ex\n\u001b[1;32m 435\u001b[0m \u001b[38;5;66;03m# fetch_pandas_all/batches() only works for SELECT statements\u001b[39;00m\n\u001b[1;32m 436\u001b[0m \u001b[38;5;66;03m# We call fetchall() if fetch_pandas_all/batches() fails,\u001b[39;00m\n\u001b[1;32m 437\u001b[0m \u001b[38;5;66;03m# because when the query plan has multiple queries, it will\u001b[39;00m\n\u001b[1;32m 438\u001b[0m \u001b[38;5;66;03m# have non-select statements, and it shouldn't fail if the user\u001b[39;00m\n\u001b[1;32m 439\u001b[0m \u001b[38;5;66;03m# calls to_pandas() to execute the query.\u001b[39;00m\n\u001b[1;32m 440\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m block:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:418\u001b[0m, in \u001b[0;36mServerConnection.run_query\u001b[0;34m(self, query, to_pandas, to_iter, is_ddl_on_temp_object, block, data_type, async_job_plan, log_on_exception, case_sensitive, params, num_statements, ignore_results, **kwargs)\u001b[0m\n\u001b[1;32m 416\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_statement_params\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSNOWPARK_SKIP_TXN_COMMIT_IN_DDL\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 417\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m block:\n\u001b[0;32m--> 418\u001b[0m results_cursor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_and_notify_query_listener\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 421\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExecute query [queryID: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresults_cursor\u001b[38;5;241m.\u001b[39msfqid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquery\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 422\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:369\u001b[0m, in \u001b[0;36mServerConnection.execute_and_notify_query_listener\u001b[0;34m(self, query, **kwargs)\u001b[0m\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mexecute_and_notify_query_listener\u001b[39m(\n\u001b[1;32m 367\u001b[0m \u001b[38;5;28mself\u001b[39m, query: \u001b[38;5;28mstr\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any\n\u001b[1;32m 368\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SnowflakeCursor:\n\u001b[0;32m--> 369\u001b[0m results_cursor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_cursor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnotify_query_listeners(\n\u001b[1;32m 371\u001b[0m QueryRecord(results_cursor\u001b[38;5;241m.\u001b[39msfqid, results_cursor\u001b[38;5;241m.\u001b[39mquery)\n\u001b[1;32m 372\u001b[0m )\n\u001b[1;32m 373\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m results_cursor\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/connector/cursor.py:1080\u001b[0m, in \u001b[0;36mSnowflakeCursor.execute\u001b[0;34m(self, command, params, _bind_stage, timeout, _exec_async, _no_retry, _do_reset, _put_callback, _put_azure_callback, _put_callback_output_stream, _get_callback, _get_azure_callback, _get_callback_output_stream, _show_progress_bar, _statement_params, _is_internal, _describe_only, _no_results, _is_put_get, _raise_put_get_error, _force_put_overwrite, _skip_upload_on_content_match, file_stream, num_statements)\u001b[0m\n\u001b[1;32m 1076\u001b[0m is_integrity_error \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1077\u001b[0m code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m100072\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1078\u001b[0m ) \u001b[38;5;66;03m# NULL result in a non-nullable column\u001b[39;00m\n\u001b[1;32m 1079\u001b[0m error_class \u001b[38;5;241m=\u001b[39m IntegrityError \u001b[38;5;28;01mif\u001b[39;00m is_integrity_error \u001b[38;5;28;01melse\u001b[39;00m ProgrammingError\n\u001b[0;32m-> 1080\u001b[0m \u001b[43mError\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merrorhandler_wrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror_class\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1081\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/connector/errors.py:290\u001b[0m, in \u001b[0;36mError.errorhandler_wrapper\u001b[0;34m(connection, cursor, error_class, error_value)\u001b[0m\n\u001b[1;32m 267\u001b[0m \u001b[38;5;129m@staticmethod\u001b[39m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21merrorhandler_wrapper\u001b[39m(\n\u001b[1;32m 269\u001b[0m connection: SnowflakeConnection \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 272\u001b[0m error_value: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any],\n\u001b[1;32m 273\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 274\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Error handler wrapper that calls the errorhandler method.\u001b[39;00m\n\u001b[1;32m 275\u001b[0m \n\u001b[1;32m 276\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;124;03m exception to the first handler in that order.\u001b[39;00m\n\u001b[1;32m 288\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 290\u001b[0m handed_over \u001b[38;5;241m=\u001b[39m \u001b[43mError\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhand_to_other_handler\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 291\u001b[0m \u001b[43m \u001b[49m\u001b[43mconnection\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 292\u001b[0m \u001b[43m \u001b[49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 293\u001b[0m \u001b[43m \u001b[49m\u001b[43merror_class\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 294\u001b[0m \u001b[43m \u001b[49m\u001b[43merror_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 296\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m handed_over:\n\u001b[1;32m 297\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Error\u001b[38;5;241m.\u001b[39merrorhandler_make_exception(\n\u001b[1;32m 298\u001b[0m error_class,\n\u001b[1;32m 299\u001b[0m error_value,\n\u001b[1;32m 300\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/connector/errors.py:345\u001b[0m, in \u001b[0;36mError.hand_to_other_handler\u001b[0;34m(connection, cursor, error_class, error_value)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cursor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 344\u001b[0m cursor\u001b[38;5;241m.\u001b[39mmessages\u001b[38;5;241m.\u001b[39mappend((error_class, error_value))\n\u001b[0;32m--> 345\u001b[0m \u001b[43mcursor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merrorhandler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconnection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror_class\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m connection \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/connector/errors.py:221\u001b[0m, in \u001b[0;36mError.default_errorhandler\u001b[0;34m(connection, cursor, error_class, error_value)\u001b[0m\n\u001b[1;32m 219\u001b[0m errno \u001b[38;5;241m=\u001b[39m error_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124merrno\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 220\u001b[0m done_format_msg \u001b[38;5;241m=\u001b[39m error_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdone_format_msg\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 221\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(\n\u001b[1;32m 222\u001b[0m msg\u001b[38;5;241m=\u001b[39merror_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmsg\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 223\u001b[0m errno\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m errno \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mint\u001b[39m(errno),\n\u001b[1;32m 224\u001b[0m sqlstate\u001b[38;5;241m=\u001b[39merror_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msqlstate\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 225\u001b[0m sfqid\u001b[38;5;241m=\u001b[39merror_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msfqid\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 226\u001b[0m query\u001b[38;5;241m=\u001b[39merror_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 227\u001b[0m done_format_msg\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m done_format_msg \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mbool\u001b[39m(done_format_msg)\n\u001b[1;32m 229\u001b[0m ),\n\u001b[1;32m 230\u001b[0m connection\u001b[38;5;241m=\u001b[39mconnection,\n\u001b[1;32m 231\u001b[0m cursor\u001b[38;5;241m=\u001b[39mcursor,\n\u001b[1;32m 232\u001b[0m )\n", + "\u001b[0;31mSnowparkSQLException\u001b[0m: (1304): 01b6ae78-0d07-1a71-0002-990387e5d42b: 100035 (22007): Timestamp 'Jul 31, 2009' is not recognized" + ] + } + ], + "source": [ + "pd.to_datetime(pd.Series([\"Jul 31, 2009\", \"Jan 10, 2010\", None]))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "bc0d555d-e2af-42fe-96af-024274ce001e", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:56.273155Z", + "iopub.status.busy": "2024-08-29T21:12:56.273001Z", + "iopub.status.idle": "2024-08-29T21:12:57.106508Z", + "shell.execute_reply": "2024-08-29T21:12:57.105822Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime([\"2005/11/23\", \"2010/12/31\"])" + ] + }, + { + "cell_type": "markdown", + "id": "12126dc9-6fad-4667-a3ce-82ccb0ff54df", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### If you use dates which start with the day first (i.e. European style), you can pass the dayfirst flag:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "88419653-e387-4cb7-a25a-428dccd767a3", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:57.112766Z", + "iopub.status.busy": "2024-08-29T21:12:57.112379Z", + "iopub.status.idle": "2024-08-29T21:12:57.808940Z", + "shell.execute_reply": "2024-08-29T21:12:57.802786Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-01-04 10:00:00'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime([\"04-01-2012 10:00\"], dayfirst=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "e5f75293-3b7f-47ba-9348-9b71aa6ea774", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:57.827247Z", + "iopub.status.busy": "2024-08-29T21:12:57.826810Z", + "iopub.status.idle": "2024-08-29T21:12:58.529092Z", + "shell.execute_reply": "2024-08-29T21:12:58.528007Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "UserWarning: Parsing dates in %m-%d-%Y %H:%M format when dayfirst=True was specified. Pass `dayfirst=False` or specify a format to silence this warning.\n" + ] + }, + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-04-14 10:00:00'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime([\"04-14-2012 10:00\"], dayfirst=True)" + ] + }, + { + "cell_type": "markdown", + "id": "901c62d1-5db8-47d7-b32a-f3e911f02036", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### You can also use the DatetimeIndex constructor directly:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "5d8cfe2e-b991-4036-acdf-a6b4da02f1c0", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:58.534583Z", + "iopub.status.busy": "2024-08-29T21:12:58.534156Z", + "iopub.status.idle": "2024-08-29T21:12:59.059427Z", + "shell.execute_reply": "2024-08-29T21:12:59.057882Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DatetimeIndex([\"2018-01-01\", \"2018-01-03\", \"2018-01-05\"])" + ] + }, + { + "cell_type": "markdown", + "id": "b8ebb2fc-173c-406a-ade8-d807b1073b2b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### The string ‘infer’ can be passed in order to set the frequency of the index as the inferred frequency upon creation:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "f06dcd55-ebf6-4c33-b299-8c8f7fde94b3", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:59.066203Z", + "iopub.status.busy": "2024-08-29T21:12:59.065831Z", + "iopub.status.idle": "2024-08-29T21:12:59.601774Z", + "shell.execute_reply": "2024-08-29T21:12:59.600584Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DatetimeIndex([\"2018-01-01\", \"2018-01-03\", \"2018-01-05\"], freq=\"infer\")" + ] + }, + { + "cell_type": "markdown", + "id": "82fd0285-3c12-4429-a10e-d505192fe2a1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Providing a format argument" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "bce3c8c4-bfed-432c-a6f1-1b18151843bc", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:59.608185Z", + "iopub.status.busy": "2024-08-29T21:12:59.607833Z", + "iopub.status.idle": "2024-08-29T21:12:59.613136Z", + "shell.execute_reply": "2024-08-29T21:12:59.612608Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2010-11-12 00:00:00')" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(\"2010/11/12\", format=\"%Y/%m/%d\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "6adee443-0929-49a7-ab1f-d3f3a2d7d164", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:59.618039Z", + "iopub.status.busy": "2024-08-29T21:12:59.617739Z", + "iopub.status.idle": "2024-08-29T21:12:59.622386Z", + "shell.execute_reply": "2024-08-29T21:12:59.621865Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2010-11-12 00:00:00')" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(\"12-11-2010 00:00\", format=\"%d-%m-%Y %H:%M\")" + ] + }, + { + "cell_type": "markdown", + "id": "4484371f-ef3a-4e7f-a16a-83dbb0a74741", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Assembling datetime from multiple DataFrame columns" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "00230009-1790-48ea-b58b-d68e162b1d8d", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:59.626836Z", + "iopub.status.busy": "2024-08-29T21:12:59.626477Z", + "iopub.status.idle": "2024-08-29T21:13:00.302444Z", + "shell.execute_reply": "2024-08-29T21:13:00.301244Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2015-02-04 02:00:00\n", + "1 2016-03-05 03:00:00\n", + "dtype: datetime64[ns]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(\n", + " {\"year\": [2015, 2016], \"month\": [2, 3], \"day\": [4, 5], \"hour\": [2, 3]}\n", + ")\n", + "\n", + "\n", + "pd.to_datetime(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "d1481451-a63f-4f1d-8c58-b570ad099bca", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:00.313705Z", + "iopub.status.busy": "2024-08-29T21:13:00.313078Z", + "iopub.status.idle": "2024-08-29T21:13:00.856785Z", + "shell.execute_reply": "2024-08-29T21:13:00.856191Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2015-02-04\n", + "1 2016-03-05\n", + "dtype: datetime64[ns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(df[[\"year\", \"month\", \"day\"]])" + ] + }, + { + "cell_type": "markdown", + "id": "d0f4fd47-3a16-4b5a-99ca-ef698e8188d1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Invalid data" + ] + }, + { + "cell_type": "markdown", + "id": "273d5d66-590f-4d88-a3c6-2623827ed8f1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### The default behavior, errors='raise', is to raise when unparsable:" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "3a90e705-7028-4604-b1c8-ad37d9ad804c", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:00.866862Z", + "iopub.status.busy": "2024-08-29T21:13:00.866601Z", + "iopub.status.idle": "2024-08-29T21:13:01.222078Z", + "shell.execute_reply": "2024-08-29T21:13:01.221762Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "time data \"asd\" doesn't match format \"%Y/%m/%d\", at position 1. You might want to try:\n - passing `format` if your strings have a consistent format;\n - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;\n - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[36], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m2009/07/31\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43masd\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mraise\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:454\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_standalone_function_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/general.py:1786\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1782\u001b[0m raise_if_native_pandas_objects(arg)\n\u001b[1;32m 1784\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arg, (DataFrame, Series, pd\u001b[38;5;241m.\u001b[39mIndex)):\n\u001b[1;32m 1785\u001b[0m \u001b[38;5;66;03m# use pandas.to_datetime to convert local data to datetime\u001b[39;00m\n\u001b[0;32m-> 1786\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1787\u001b[0m \u001b[43m \u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1788\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1789\u001b[0m \u001b[43m \u001b[49m\u001b[43mdayfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1790\u001b[0m \u001b[43m \u001b[49m\u001b[43myearfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1791\u001b[0m \u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1792\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1793\u001b[0m \u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1794\u001b[0m \u001b[43m \u001b[49m\u001b[43munit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1795\u001b[0m \u001b[43m \u001b[49m\u001b[43minfer_datetime_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1796\u001b[0m \u001b[43m \u001b[49m\u001b[43morigin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1797\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1798\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1799\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(res, pandas\u001b[38;5;241m.\u001b[39mSeries):\n\u001b[1;32m 1800\u001b[0m res \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mSeries(res)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:1099\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1097\u001b[0m result \u001b[38;5;241m=\u001b[39m _convert_and_box_cache(argc, cache_array)\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1099\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_listlike\u001b[49m\u001b[43m(\u001b[49m\u001b[43margc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1101\u001b[0m result \u001b[38;5;241m=\u001b[39m convert_listlike(np\u001b[38;5;241m.\u001b[39marray([arg]), \u001b[38;5;28mformat\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:433\u001b[0m, in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 431\u001b[0m \u001b[38;5;66;03m# `format` could be inferred, or user didn't ask for mixed-format parsing.\u001b[39;00m\n\u001b[1;32m 432\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmixed\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 433\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_array_strptime_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 435\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m objects_to_datetime64(\n\u001b[1;32m 436\u001b[0m arg,\n\u001b[1;32m 437\u001b[0m dayfirst\u001b[38;5;241m=\u001b[39mdayfirst,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 441\u001b[0m allow_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 442\u001b[0m )\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 445\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 446\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:467\u001b[0m, in \u001b[0;36m_array_strptime_with_fallback\u001b[0;34m(arg, name, utc, fmt, exact, errors)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_array_strptime_with_fallback\u001b[39m(\n\u001b[1;32m 457\u001b[0m arg,\n\u001b[1;32m 458\u001b[0m name,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 462\u001b[0m errors: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 463\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Index:\n\u001b[1;32m 464\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 465\u001b[0m \u001b[38;5;124;03m Call array_strptime, with fallback behavior depending on 'errors'.\u001b[39;00m\n\u001b[1;32m 466\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 467\u001b[0m result, tz_out \u001b[38;5;241m=\u001b[39m \u001b[43marray_strptime\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfmt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mutc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_out \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 469\u001b[0m unit \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mdatetime_data(result\u001b[38;5;241m.\u001b[39mdtype)[\u001b[38;5;241m0\u001b[39m]\n", + "File \u001b[0;32mstrptime.pyx:501\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime.array_strptime\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mstrptime.pyx:451\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime.array_strptime\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mstrptime.pyx:583\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime._parse_with_format\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: time data \"asd\" doesn't match format \"%Y/%m/%d\", at position 1. You might want to try:\n - passing `format` if your strings have a consistent format;\n - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;\n - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this." + ] + } + ], + "source": [ + "pd.to_datetime(['2009/07/31', 'asd'], errors='raise')" + ] + }, + { + "cell_type": "markdown", + "id": "71a6317f-66eb-4689-a15f-61a03b2c93c4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Pass errors='coerce' to convert unparsable data to NaT (not a time):" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "c15076fb-8bf0-47b3-9178-e745b27b0eae", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:01.233855Z", + "iopub.status.busy": "2024-08-29T21:13:01.233738Z", + "iopub.status.idle": "2024-08-29T21:13:01.904070Z", + "shell.execute_reply": "2024-08-29T21:13:01.903200Z" + }, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2009-07-31', 'NaT'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(['2009/07/31', 'asd'], errors='coerce')" + ] + }, + { + "cell_type": "markdown", + "id": "ea663c7c-13a2-458f-85cd-999116b6b7ed", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Epoch timestamps" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "018f285f-ad0a-454c-abe4-7ee1fcf7acfb", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:01.909194Z", + "iopub.status.busy": "2024-08-29T21:13:01.908859Z", + "iopub.status.idle": "2024-08-29T21:13:02.578918Z", + "shell.execute_reply": "2024-08-29T21:13:02.578486Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',\n", + " '2012-10-10 18:15:05', '2012-10-11 18:15:05',\n", + " '2012-10-12 18:15:05'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(\n", + " [1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit=\"s\"\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "7f15dd29-958a-4279-a14a-75851710cf65", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:02.581162Z", + "iopub.status.busy": "2024-08-29T21:13:02.580986Z", + "iopub.status.idle": "2024-08-29T21:13:03.340376Z", + "shell.execute_reply": "2024-08-29T21:13:03.339624Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['1970-01-16 14:55:20.105000', '1970-01-16 14:56:46.505000',\n", + " '1970-01-16 14:58:12.905000', '1970-01-16 14:59:39.305000',\n", + " '1970-01-16 15:01:05.705000'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(\n", + " [1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit=\"ms\"\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "id": "f796ad57-6bce-4930-9217-5da5b9d37dc0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Constructing a Timestamp or DatetimeIndex with an epoch timestamp with the tz argument specified will raise a ValueError. If you have epochs in wall time in another timezone, you can read the epochs as timezone-naive timestamps and then localize to the appropriate timezone:" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "f565054c-8ecf-47f3-a601-181d1b66cb59", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:03.345596Z", + "iopub.status.busy": "2024-08-29T21:13:03.345274Z", + "iopub.status.idle": "2024-08-29T21:13:03.411334Z", + "shell.execute_reply": "2024-08-29T21:13:03.411013Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2010-01-01 12:00:00-0800', tz='US/Pacific')" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Timestamp(1262347200000000000).tz_localize(\"US/Pacific\")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "06b3d9cc-9a51-4039-a9a7-84c66d4221a8", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:03.413022Z", + "iopub.status.busy": "2024-08-29T21:13:03.412902Z", + "iopub.status.idle": "2024-08-29T21:13:03.588840Z", + "shell.execute_reply": "2024-08-29T21:13:03.588516Z" + }, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method DatetimeIndex.tz_localize", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[41], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDatetimeIndex\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1262347200000000000\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtz_localize\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUS/Pacific\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method DatetimeIndex.tz_localize" + ] + } + ], + "source": [ + "pd.DatetimeIndex([1262347200000000000]).tz_localize(\"US/Pacific\")" + ] + }, + { + "cell_type": "markdown", + "id": "86658f09-80b8-42a5-a108-efc4dddfdb09", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "### From timestamps to epoch" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "0e4e6063-a60f-4a70-adca-2cb9b3b101f8", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:03.595180Z", + "iopub.status.busy": "2024-08-29T21:13:03.595035Z", + "iopub.status.idle": "2024-08-29T21:13:04.620415Z", + "shell.execute_reply": "2024-08-29T21:13:04.619552Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',\n", + " '2012-10-10 18:15:05', '2012-10-11 18:15:05'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stamps = pd.date_range(\"2012-10-08 18:15:05\", periods=4, freq=\"D\")\n", + "stamps" + ] + }, + { + "cell_type": "markdown", + "id": "720ebf04-b3d2-45c7-9393-3b064c3cf6d1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### We subtract the epoch (midnight at January 1, 1970 UTC) and then floor divide by the “unit” (1 second)." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "f4a38e8b-abcb-49c6-839d-01e4215d7d7a", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:04.624393Z", + "iopub.status.busy": "2024-08-29T21:13:04.624099Z", + "iopub.status.idle": "2024-08-29T21:13:05.994009Z", + "shell.execute_reply": "2024-08-29T21:13:05.989798Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(stamps - pd.Timestamp(\"1970-01-01\")) // pd.Timedelta(\"1s\")" + ] + }, + { + "cell_type": "markdown", + "id": "09d2b860-df2d-4069-b4b0-2dd1a8697faf", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Using the origin parameter" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "099f77b7-7a79-4dcd-bc37-552616cbb4c0", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:06.006249Z", + "iopub.status.busy": "2024-08-29T21:13:06.004156Z", + "iopub.status.idle": "2024-08-29T21:13:06.696681Z", + "shell.execute_reply": "2024-08-29T21:13:06.695745Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime([1, 2, 3], unit=\"D\", origin=pd.Timestamp(\"1960-01-01\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "e1878fc8-b2f3-4c93-b118-6d185ea19ce5", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:06.701885Z", + "iopub.status.busy": "2024-08-29T21:13:06.701537Z", + "iopub.status.idle": "2024-08-29T21:13:07.424582Z", + "shell.execute_reply": "2024-08-29T21:13:07.422794Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['1970-01-02', '1970-01-03', '1970-01-04'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime([1, 2, 3], unit=\"D\")" + ] + }, + { + "cell_type": "markdown", + "id": "6415bc76-3599-4c37-ab86-2b841bac3b95", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Generating ranges of timestamps" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "e349e6b7-fdda-4879-b469-9719d2b74be0", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:07.429375Z", + "iopub.status.busy": "2024-08-29T21:13:07.428977Z", + "iopub.status.idle": "2024-08-29T21:13:07.946936Z", + "shell.execute_reply": "2024-08-29T21:13:07.946186Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dates = [\n", + " datetime.datetime(2012, 5, 1),\n", + " datetime.datetime(2012, 5, 2),\n", + " datetime.datetime(2012, 5, 3),\n", + "]\n", + "\n", + "\n", + "# Note the frequency information\n", + "index = pd.DatetimeIndex(dates)\n", + "\n", + "index" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "7c293a9c-a307-4bf3-9414-e5db0abc5b55", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:07.952090Z", + "iopub.status.busy": "2024-08-29T21:13:07.951757Z", + "iopub.status.idle": "2024-08-29T21:13:08.656604Z", + "shell.execute_reply": "2024-08-29T21:13:08.655201Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Automatically converted to DatetimeIndex\n", + "index = pd.Index(dates)\n", + "\n", + "index" + ] + }, + { + "cell_type": "markdown", + "id": "864f9e1c-969d-489e-8e3e-aab762e4680a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### In practice this becomes very cumbersome because we often need a very long index with a large number of timestamps. If we need timestamps on a regular frequency, we can use the date_range() and bdate_range() functions to create a DatetimeIndex. The default frequency for date_range is a calendar day while the default for bdate_range is a business day:" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "9576a971-d970-4fd0-95fe-46738536c6d3", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:08.663459Z", + "iopub.status.busy": "2024-08-29T21:13:08.663082Z", + "iopub.status.idle": "2024-08-29T21:13:09.867935Z", + "shell.execute_reply": "2024-08-29T21:13:09.866893Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',\n", + " '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',\n", + " '2011-01-09', '2011-01-10',\n", + " ...\n", + " '2011-12-23', '2011-12-24', '2011-12-25', '2011-12-26',\n", + " '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30',\n", + " '2011-12-31', '2012-01-01'],\n", + " dtype='datetime64[ns]', length=366, freq=None)" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "start = datetime.datetime(2011, 1, 1)\n", + "\n", + "end = datetime.datetime(2012, 1, 1)\n", + "\n", + "index = pd.date_range(start, end)\n", + "\n", + "index" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "c2661f9d-f500-4886-b199-159593a9d23c", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:09.884058Z", + "iopub.status.busy": "2024-08-29T21:13:09.883330Z", + "iopub.status.idle": "2024-08-29T21:13:09.894222Z", + "shell.execute_reply": "2024-08-29T21:13:09.893806Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',\n", + " '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',\n", + " '2011-01-13', '2011-01-14',\n", + " ...\n", + " '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',\n", + " '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',\n", + " '2011-12-29', '2011-12-30'],\n", + " dtype='datetime64[ns]', length=260, freq='B')" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "index = pd.bdate_range(start, end)\n", + "\n", + "index" + ] + }, + { + "cell_type": "markdown", + "id": "ff77a6bc-8197-4e48-b8e7-f182a3f6e742", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Convenience functions like date_range and bdate_range can utilize a variety of frequency aliases:" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "2d80a37d-b58e-4eee-a8e6-8cef919547cc", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:09.897578Z", + "iopub.status.busy": "2024-08-29T21:13:09.897407Z", + "iopub.status.idle": "2024-08-29T21:13:10.957551Z", + "shell.execute_reply": "2024-08-29T21:13:10.956700Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',\n", + " '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',\n", + " '2011-09-30', '2011-10-31',\n", + " ...\n", + " '2093-07-31', '2093-08-31', '2093-09-30', '2093-10-31',\n", + " '2093-11-30', '2093-12-31', '2094-01-31', '2094-02-28',\n", + " '2094-03-31', '2094-04-30'],\n", + " dtype='datetime64[ns]', length=1000, freq=None)" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.date_range(start, periods=1000, freq=\"ME\")" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "b2d8e0eb-b332-4ad6-a503-6d92d774c65a", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:10.962554Z", + "iopub.status.busy": "2024-08-29T21:13:10.962185Z", + "iopub.status.idle": "2024-08-29T21:13:10.969894Z", + "shell.execute_reply": "2024-08-29T21:13:10.969391Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-03', '2011-04-01', '2011-07-01', '2011-10-03',\n", + " '2012-01-02', '2012-04-02', '2012-07-02', '2012-10-01',\n", + " '2013-01-01', '2013-04-01',\n", + " ...\n", + " '2071-01-01', '2071-04-01', '2071-07-01', '2071-10-01',\n", + " '2072-01-01', '2072-04-01', '2072-07-01', '2072-10-03',\n", + " '2073-01-02', '2073-04-03'],\n", + " dtype='datetime64[ns]', length=250, freq='BQS-JAN')" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.bdate_range(start, periods=250, freq=\"BQS\")" + ] + }, + { + "cell_type": "markdown", + "id": "1a42453c-bea7-470a-be57-650f42fea9a5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### date_range and bdate_range make it easy to generate a range of dates using various combinations of parameters like start, end, periods, and freq. The start and end dates are strictly inclusive, so dates outside of those specified will not be generated:" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "9a30b89e-2a20-488f-a3a4-6106c20a6b3b", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:10.974736Z", + "iopub.status.busy": "2024-08-29T21:13:10.974462Z", + "iopub.status.idle": "2024-08-29T21:13:11.083073Z", + "shell.execute_reply": "2024-08-29T21:13:11.082758Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "offset BME is not implemented in Snowpark pandas API", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[52], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mBME\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:454\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_standalone_function_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/general.py:2405\u001b[0m, in \u001b[0;36mdate_range\u001b[0;34m(start, end, periods, freq, tz, normalize, name, inclusive, **kwargs)\u001b[0m\n\u001b[1;32m 2401\u001b[0m \u001b[38;5;66;03m# If a timezone is not explicitly given via `tz`, see if one can be inferred from the `start` and `end` endpoints.\u001b[39;00m\n\u001b[1;32m 2402\u001b[0m \u001b[38;5;66;03m# If more than one of these inputs provides a timezone, require that they all agree.\u001b[39;00m\n\u001b[1;32m 2403\u001b[0m tz \u001b[38;5;241m=\u001b[39m _infer_tz_from_endpoints(start, end, tz)\n\u001b[0;32m-> 2405\u001b[0m qc \u001b[38;5;241m=\u001b[39m \u001b[43mSnowflakeQueryCompiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_date_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2406\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2407\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2408\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2409\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2410\u001b[0m \u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2411\u001b[0m \u001b[43m \u001b[49m\u001b[43mleft_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mleft_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2412\u001b[0m \u001b[43m \u001b[49m\u001b[43mright_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mright_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2413\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2414\u001b[0m \u001b[38;5;66;03m# Set date range as index column.\u001b[39;00m\n\u001b[1;32m 2415\u001b[0m qc \u001b[38;5;241m=\u001b[39m qc\u001b[38;5;241m.\u001b[39mset_index_from_columns(qc\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mtolist(), include_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:691\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_date_range\u001b[0;34m(cls, start, end, periods, freq, tz, left_inclusive, right_inclusive)\u001b[0m\n\u001b[1;32m 689\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m ns_values\u001b[38;5;241m.\u001b[39mseries_to_datetime()\n\u001b[1;32m 690\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 691\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m \u001b[43mgenerator_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_irregular_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 692\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\n\u001b[1;32m 693\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 694\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 695\u001b[0m \u001b[38;5;66;03m# Create a linearly spaced date_range in local time\u001b[39;00m\n\u001b[1;32m 696\u001b[0m \u001b[38;5;66;03m# This is the original pandas source code:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[1;32m 701\u001b[0m \u001b[38;5;66;03m# Here we implement it similarly as np.linspace\u001b[39;00m\n\u001b[1;32m 702\u001b[0m div \u001b[38;5;241m=\u001b[39m periods \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;66;03m# type: ignore[operator]\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:216\u001b[0m, in \u001b[0;36mgenerate_irregular_range\u001b[0;34m(start, end, periods, offset)\u001b[0m\n\u001b[1;32m 213\u001b[0m periods \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 215\u001b[0m num_offsets \u001b[38;5;241m=\u001b[39m get_active_session()\u001b[38;5;241m.\u001b[39mrange(start\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, end\u001b[38;5;241m=\u001b[39mperiods, step\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 216\u001b[0m sf_date_or_time_part \u001b[38;5;241m=\u001b[39m \u001b[43m_offset_name_to_sf_date_or_time_part\u001b[49m\u001b[43m(\u001b[49m\u001b[43moffset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 217\u001b[0m dt_col \u001b[38;5;241m=\u001b[39m builtin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDATEADD\u001b[39m\u001b[38;5;124m\"\u001b[39m)(\n\u001b[1;32m 218\u001b[0m sf_date_or_time_part,\n\u001b[1;32m 219\u001b[0m offset\u001b[38;5;241m.\u001b[39mn \u001b[38;5;241m*\u001b[39m col(num_offsets\u001b[38;5;241m.\u001b[39mcolumns[\u001b[38;5;241m0\u001b[39m]),\n\u001b[1;32m 220\u001b[0m pandas_lit(start),\n\u001b[1;32m 221\u001b[0m )\n\u001b[1;32m 222\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m offset\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01min\u001b[39;00m LAST_DAY:\n\u001b[1;32m 223\u001b[0m \u001b[38;5;66;03m# When last day is required, we need to explicitly call LAST_DAY SQL function to convert DATEADD results to the\u001b[39;00m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;66;03m# last day, e.g., adding one month to \"2/29/2024\" using DATEADD results \"3/29/2024\", which is not the last day\u001b[39;00m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# of March. So we need to call LAST_DAY. Also, LAST_DAY only return the date, then we need to reconstruct the\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# timestamp using timestamp_ntz_from_parts\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:162\u001b[0m, in \u001b[0;36m_offset_name_to_sf_date_or_time_part\u001b[0;34m(name)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP:\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP[name]\n\u001b[0;32m--> 162\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43moffset \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m is not implemented in Snowpark pandas API\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: offset BME is not implemented in Snowpark pandas API" + ] + } + ], + "source": [ + "pd.date_range(start, end, freq=\"BME\")" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "cb8484ed-fbd1-4331-a87a-4382e2eb4a64", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:11.097524Z", + "iopub.status.busy": "2024-08-29T21:13:11.097382Z", + "iopub.status.idle": "2024-08-29T21:13:12.139910Z", + "shell.execute_reply": "2024-08-29T21:13:12.138779Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-02', '2011-01-09', '2011-01-16', '2011-01-23',\n", + " '2011-01-30', '2011-02-06', '2011-02-13', '2011-02-20',\n", + " '2011-02-27', '2011-03-06', '2011-03-13', '2011-03-20',\n", + " '2011-03-27', '2011-04-03', '2011-04-10', '2011-04-17',\n", + " '2011-04-24', '2011-05-01', '2011-05-08', '2011-05-15',\n", + " '2011-05-22', '2011-05-29', '2011-06-05', '2011-06-12',\n", + " '2011-06-19', '2011-06-26', '2011-07-03', '2011-07-10',\n", + " '2011-07-17', '2011-07-24', '2011-07-31', '2011-08-07',\n", + " '2011-08-14', '2011-08-21', '2011-08-28', '2011-09-04',\n", + " '2011-09-11', '2011-09-18', '2011-09-25', '2011-10-02',\n", + " '2011-10-09', '2011-10-16', '2011-10-23', '2011-10-30',\n", + " '2011-11-06', '2011-11-13', '2011-11-20', '2011-11-27',\n", + " '2011-12-04', '2011-12-11', '2011-12-18', '2011-12-25',\n", + " '2012-01-01'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.date_range(start, end, freq=\"W\")" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "ed7a8ede-b4ac-40b7-a9ca-0523b8b2d908", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:12.143926Z", + "iopub.status.busy": "2024-08-29T21:13:12.143641Z", + "iopub.status.idle": "2024-08-29T21:13:12.150593Z", + "shell.execute_reply": "2024-08-29T21:13:12.149956Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-12-05', '2011-12-06', '2011-12-07', '2011-12-08',\n", + " '2011-12-09', '2011-12-12', '2011-12-13', '2011-12-14',\n", + " '2011-12-15', '2011-12-16', '2011-12-19', '2011-12-20',\n", + " '2011-12-21', '2011-12-22', '2011-12-23', '2011-12-26',\n", + " '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30'],\n", + " dtype='datetime64[ns]', freq='B')" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.bdate_range(end=end, periods=20)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "6ce60028-87c2-4d03-aca4-79d74c713b1d", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:12.154722Z", + "iopub.status.busy": "2024-08-29T21:13:12.154470Z", + "iopub.status.idle": "2024-08-29T21:13:12.159125Z", + "shell.execute_reply": "2024-08-29T21:13:12.158630Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',\n", + " '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',\n", + " '2011-01-13', '2011-01-14', '2011-01-17', '2011-01-18',\n", + " '2011-01-19', '2011-01-20', '2011-01-21', '2011-01-24',\n", + " '2011-01-25', '2011-01-26', '2011-01-27', '2011-01-28'],\n", + " dtype='datetime64[ns]', freq='B')" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.bdate_range(start=start, periods=20)" + ] + }, + { + "cell_type": "markdown", + "id": "b540e12a-5c2f-4f61-a074-a6a872680e42", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Specifying start, end, and periods will generate a range of evenly spaced dates from start to end inclusively, with periods number of elements in the resulting DatetimeIndex:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "1470b8ca-270e-4701-bca0-751485a21d3e", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:12.162192Z", + "iopub.status.busy": "2024-08-29T21:13:12.162058Z", + "iopub.status.idle": "2024-08-29T21:13:13.160310Z", + "shell.execute_reply": "2024-08-29T21:13:13.158836Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',\n", + " '2018-01-05'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.date_range(\"2018-01-01\", \"2018-01-05\", periods=5)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "2a0022ee-ba87-4c2b-8c41-16d9271cebe7", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:13.166386Z", + "iopub.status.busy": "2024-08-29T21:13:13.166138Z", + "iopub.status.idle": "2024-08-29T21:13:14.168199Z", + "shell.execute_reply": "2024-08-29T21:13:14.166045Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 10:40:00',\n", + " '2018-01-01 21:20:00', '2018-01-02 08:00:00',\n", + " '2018-01-02 18:40:00', '2018-01-03 05:20:00',\n", + " '2018-01-03 16:00:00', '2018-01-04 02:40:00',\n", + " '2018-01-04 13:20:00', '2018-01-05 00:00:00'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.date_range(\"2018-01-01\", \"2018-01-05\", periods=10)" + ] + }, + { + "cell_type": "markdown", + "id": "1960e1c1-1488-45af-9ac8-ba6ce9fe5d4e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Custom frequency ranges" + ] + }, + { + "cell_type": "markdown", + "id": "2d4f1cd9-4aad-4c44-98c1-65234836a294", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### bdate_range can also generate a range of custom frequency dates by using the weekmask and holidays parameters. These parameters will only be used if a custom frequency string is passed.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "2a322ecb-3c8d-443b-9ad0-a9a3b5ca68d5", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:14.175290Z", + "iopub.status.busy": "2024-08-29T21:13:14.174969Z", + "iopub.status.idle": "2024-08-29T21:13:14.183465Z", + "shell.execute_reply": "2024-08-29T21:13:14.182892Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-03', '2011-01-07', '2011-01-10', '2011-01-12',\n", + " '2011-01-14', '2011-01-17', '2011-01-19', '2011-01-21',\n", + " '2011-01-24', '2011-01-26',\n", + " ...\n", + " '2011-12-09', '2011-12-12', '2011-12-14', '2011-12-16',\n", + " '2011-12-19', '2011-12-21', '2011-12-23', '2011-12-26',\n", + " '2011-12-28', '2011-12-30'],\n", + " dtype='datetime64[ns]', length=154, freq='C')" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weekmask = \"Mon Wed Fri\"\n", + "\n", + "holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)]\n", + "\n", + "pd.bdate_range(start, end, freq=\"C\", weekmask=weekmask, holidays=holidays)" + ] + }, + { + "cell_type": "markdown", + "id": "efb29c5a-0e24-4e0f-b535-68905f6739a4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Indexing" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "cc530503-75c4-4aed-b8c4-b589609dc868", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:14.188084Z", + "iopub.status.busy": "2024-08-29T21:13:14.187833Z", + "iopub.status.idle": "2024-08-29T21:13:14.251094Z", + "shell.execute_reply": "2024-08-29T21:13:14.250539Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "offset BME is not implemented in Snowpark pandas API", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[59], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m rng \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mBME\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m ts \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mSeries(np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mrandn(\u001b[38;5;28mlen\u001b[39m(rng)), index\u001b[38;5;241m=\u001b[39mrng)\n\u001b[1;32m 5\u001b[0m ts\u001b[38;5;241m.\u001b[39mindex\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:454\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_standalone_function_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/general.py:2405\u001b[0m, in \u001b[0;36mdate_range\u001b[0;34m(start, end, periods, freq, tz, normalize, name, inclusive, **kwargs)\u001b[0m\n\u001b[1;32m 2401\u001b[0m \u001b[38;5;66;03m# If a timezone is not explicitly given via `tz`, see if one can be inferred from the `start` and `end` endpoints.\u001b[39;00m\n\u001b[1;32m 2402\u001b[0m \u001b[38;5;66;03m# If more than one of these inputs provides a timezone, require that they all agree.\u001b[39;00m\n\u001b[1;32m 2403\u001b[0m tz \u001b[38;5;241m=\u001b[39m _infer_tz_from_endpoints(start, end, tz)\n\u001b[0;32m-> 2405\u001b[0m qc \u001b[38;5;241m=\u001b[39m \u001b[43mSnowflakeQueryCompiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_date_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2406\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2407\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2408\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2409\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2410\u001b[0m \u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2411\u001b[0m \u001b[43m \u001b[49m\u001b[43mleft_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mleft_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2412\u001b[0m \u001b[43m \u001b[49m\u001b[43mright_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mright_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2413\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2414\u001b[0m \u001b[38;5;66;03m# Set date range as index column.\u001b[39;00m\n\u001b[1;32m 2415\u001b[0m qc \u001b[38;5;241m=\u001b[39m qc\u001b[38;5;241m.\u001b[39mset_index_from_columns(qc\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mtolist(), include_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:691\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_date_range\u001b[0;34m(cls, start, end, periods, freq, tz, left_inclusive, right_inclusive)\u001b[0m\n\u001b[1;32m 689\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m ns_values\u001b[38;5;241m.\u001b[39mseries_to_datetime()\n\u001b[1;32m 690\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 691\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m \u001b[43mgenerator_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_irregular_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 692\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\n\u001b[1;32m 693\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 694\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 695\u001b[0m \u001b[38;5;66;03m# Create a linearly spaced date_range in local time\u001b[39;00m\n\u001b[1;32m 696\u001b[0m \u001b[38;5;66;03m# This is the original pandas source code:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[1;32m 701\u001b[0m \u001b[38;5;66;03m# Here we implement it similarly as np.linspace\u001b[39;00m\n\u001b[1;32m 702\u001b[0m div \u001b[38;5;241m=\u001b[39m periods \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;66;03m# type: ignore[operator]\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:216\u001b[0m, in \u001b[0;36mgenerate_irregular_range\u001b[0;34m(start, end, periods, offset)\u001b[0m\n\u001b[1;32m 213\u001b[0m periods \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 215\u001b[0m num_offsets \u001b[38;5;241m=\u001b[39m get_active_session()\u001b[38;5;241m.\u001b[39mrange(start\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, end\u001b[38;5;241m=\u001b[39mperiods, step\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 216\u001b[0m sf_date_or_time_part \u001b[38;5;241m=\u001b[39m \u001b[43m_offset_name_to_sf_date_or_time_part\u001b[49m\u001b[43m(\u001b[49m\u001b[43moffset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 217\u001b[0m dt_col \u001b[38;5;241m=\u001b[39m builtin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDATEADD\u001b[39m\u001b[38;5;124m\"\u001b[39m)(\n\u001b[1;32m 218\u001b[0m sf_date_or_time_part,\n\u001b[1;32m 219\u001b[0m offset\u001b[38;5;241m.\u001b[39mn \u001b[38;5;241m*\u001b[39m col(num_offsets\u001b[38;5;241m.\u001b[39mcolumns[\u001b[38;5;241m0\u001b[39m]),\n\u001b[1;32m 220\u001b[0m pandas_lit(start),\n\u001b[1;32m 221\u001b[0m )\n\u001b[1;32m 222\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m offset\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01min\u001b[39;00m LAST_DAY:\n\u001b[1;32m 223\u001b[0m \u001b[38;5;66;03m# When last day is required, we need to explicitly call LAST_DAY SQL function to convert DATEADD results to the\u001b[39;00m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;66;03m# last day, e.g., adding one month to \"2/29/2024\" using DATEADD results \"3/29/2024\", which is not the last day\u001b[39;00m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# of March. So we need to call LAST_DAY. Also, LAST_DAY only return the date, then we need to reconstruct the\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# timestamp using timestamp_ntz_from_parts\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:162\u001b[0m, in \u001b[0;36m_offset_name_to_sf_date_or_time_part\u001b[0;34m(name)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP:\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP[name]\n\u001b[0;32m--> 162\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43moffset \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m is not implemented in Snowpark pandas API\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: offset BME is not implemented in Snowpark pandas API" + ] + } + ], + "source": [ + "rng = pd.date_range(start, end, freq=\"BME\")\n", + "\n", + "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n", + "\n", + "ts.index" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "4e5da203-33d9-4c93-a14e-2d89a2510cf9", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:14.266591Z", + "iopub.status.busy": "2024-08-29T21:13:14.266382Z", + "iopub.status.idle": "2024-08-29T21:13:16.534061Z", + "shell.execute_reply": "2024-08-29T21:13:16.532702Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',\n", + " '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',\n", + " '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rng = pd.date_range(start, end, freq=\"ME\")\n", + "\n", + "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n", + "\n", + "ts.index" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "fd70b5db-c29c-44dd-9dbf-860dd4099ef8", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:16.540912Z", + "iopub.status.busy": "2024-08-29T21:13:16.540419Z", + "iopub.status.idle": "2024-08-29T21:13:17.091472Z", + "shell.execute_reply": "2024-08-29T21:13:17.088560Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',\n", + " '2011-05-31'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts[:5].index" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "5f1a58ce-ca6e-4cfd-8956-ec4910a9f862", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:17.096716Z", + "iopub.status.busy": "2024-08-29T21:13:17.096346Z", + "iopub.status.idle": "2024-08-29T21:13:17.662846Z", + "shell.execute_reply": "2024-08-29T21:13:17.662393Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-31', '2011-03-31', '2011-05-31', '2011-07-31',\n", + " '2011-09-30', '2011-11-30'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts[::2].index" + ] + }, + { + "cell_type": "markdown", + "id": "4e303f4f-010b-447e-9c0c-ade4b3f0d250", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### To provide convenience for accessing longer time series, you can also pass in the year or year and month as strings:" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "93ca9d24-72e0-46ef-b844-ae04c53ffea4", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:17.665508Z", + "iopub.status.busy": "2024-08-29T21:13:17.665292Z", + "iopub.status.idle": "2024-08-29T21:13:18.492864Z", + "shell.execute_reply": "2024-08-29T21:13:18.492061Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2011-01-31 -0.953229\n", + "2011-02-28 0.928557\n", + "2011-03-31 -0.444832\n", + "2011-04-30 3.278272\n", + "2011-05-31 0.545594\n", + "2011-06-30 -1.074684\n", + "2011-07-31 -1.505286\n", + "2011-08-31 0.112716\n", + "2011-09-30 1.525040\n", + "2011-10-31 0.438627\n", + "2011-11-30 -1.456351\n", + "2011-12-31 2.020059\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts[\"2011\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "b6a434c7-7865-45d6-86ba-2afeef7216c2", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:18.498185Z", + "iopub.status.busy": "2024-08-29T21:13:18.497811Z", + "iopub.status.idle": "2024-08-29T21:13:19.319099Z", + "shell.execute_reply": "2024-08-29T21:13:19.317369Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2011-06-30 -1.074684\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts[\"2011-6\"]" + ] + }, + { + "cell_type": "markdown", + "id": "885cf000-838e-4287-9870-2470afcb7fc8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### This type of slicing will work on a DataFrame with a DatetimeIndex as well. Since the partial string selection is a form of label slicing, the endpoints will be included. This would include matching times on an included date:" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "3254e5cd-cc94-479a-9c30-b679de04b870", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:19.329816Z", + "iopub.status.busy": "2024-08-29T21:13:19.329457Z", + "iopub.status.idle": "2024-08-29T21:13:31.791447Z", + "shell.execute_reply": "2024-08-29T21:13:31.790649Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-03-11 10:35:00-0.337715
2013-03-11 10:36:00-2.028548
2013-03-11 10:37:000.726182
2013-03-11 10:38:00-1.167831
2013-03-11 10:39:00-1.285208
\n", + "

100000 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-03-11 10:35:00 -0.337715\n", + "2013-03-11 10:36:00 -2.028548\n", + "2013-03-11 10:37:00 0.726182\n", + "2013-03-11 10:38:00 -1.167831\n", + "2013-03-11 10:39:00 -1.285208\n", + "\n", + "[100000 rows x 1 columns]" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.seed(0)\n", + "\n", + "dft = pd.DataFrame(\n", + " np.random.randn(100000, 1),\n", + " columns=[\"A\"],\n", + " index=pd.date_range(\"20130101\", periods=100000, freq=\"min\"),\n", + ")\n", + "\n", + "\n", + "dft" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "4aa1a291-f13e-4b35-a73b-333411df51de", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:31.797703Z", + "iopub.status.busy": "2024-08-29T21:13:31.797160Z", + "iopub.status.idle": "2024-08-29T21:13:40.117417Z", + "shell.execute_reply": "2024-08-29T21:13:40.113166Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-03-11 10:35:00-0.337715
2013-03-11 10:36:00-2.028548
2013-03-11 10:37:000.726182
2013-03-11 10:38:00-1.167831
2013-03-11 10:39:00-1.285208
\n", + "

100000 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-03-11 10:35:00 -0.337715\n", + "2013-03-11 10:36:00 -2.028548\n", + "2013-03-11 10:37:00 0.726182\n", + "2013-03-11 10:38:00 -1.167831\n", + "2013-03-11 10:39:00 -1.285208\n", + "\n", + "[100000 rows x 1 columns]" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft.loc[\"2013\"]" + ] + }, + { + "cell_type": "markdown", + "id": "510776d8-1c0f-4c19-a8eb-bf3f7eb6d939", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### This starts on the very first time in the month, and includes the last date and time for the month:" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "73ad4bae-beb3-4933-8a23-b66471bd2128", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:40.125450Z", + "iopub.status.busy": "2024-08-29T21:13:40.125049Z", + "iopub.status.idle": "2024-08-29T21:13:48.494065Z", + "shell.execute_reply": "2024-08-29T21:13:48.493171Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-02-28 23:55:00-3.284701
2013-02-28 23:56:000.475275
2013-02-28 23:57:000.501877
2013-02-28 23:58:000.222138
2013-02-28 23:59:000.717464
\n", + "

84960 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-02-28 23:55:00 -3.284701\n", + "2013-02-28 23:56:00 0.475275\n", + "2013-02-28 23:57:00 0.501877\n", + "2013-02-28 23:58:00 0.222138\n", + "2013-02-28 23:59:00 0.717464\n", + "\n", + "[84960 rows x 1 columns]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[\"2013-1\":\"2013-2\"]" + ] + }, + { + "cell_type": "markdown", + "id": "d9dbd8b5-97a1-4458-9efe-69abc1000805", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### This specifies a stop time that includes all of the times on the last day:" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "7541bba4-f71d-49d5-8901-c7882ceb04f0", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:48.499398Z", + "iopub.status.busy": "2024-08-29T21:13:48.499040Z", + "iopub.status.idle": "2024-08-29T21:13:57.151139Z", + "shell.execute_reply": "2024-08-29T21:13:57.144730Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-02-28 23:55:00-3.284701
2013-02-28 23:56:000.475275
2013-02-28 23:57:000.501877
2013-02-28 23:58:000.222138
2013-02-28 23:59:000.717464
\n", + "

84960 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-02-28 23:55:00 -3.284701\n", + "2013-02-28 23:56:00 0.475275\n", + "2013-02-28 23:57:00 0.501877\n", + "2013-02-28 23:58:00 0.222138\n", + "2013-02-28 23:59:00 0.717464\n", + "\n", + "[84960 rows x 1 columns]" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[\"2013-1\":\"2013-2-28\"]" + ] + }, + { + "cell_type": "markdown", + "id": "2b90d0fd-8479-4741-b015-d8e8219c6e2c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### This specifies an exact stop time (and is not the same as the above):" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "04deeb28-36e8-4c35-904a-02d71a9abe7c", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:57.155897Z", + "iopub.status.busy": "2024-08-29T21:13:57.155648Z", + "iopub.status.idle": "2024-08-29T21:14:05.751918Z", + "shell.execute_reply": "2024-08-29T21:14:05.750961Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-02-27 23:56:00-0.036098
2013-02-27 23:57:00-1.679458
2013-02-27 23:58:000.443969
2013-02-27 23:59:001.390478
2013-02-28 00:00:000.569440
\n", + "

83521 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-02-27 23:56:00 -0.036098\n", + "2013-02-27 23:57:00 -1.679458\n", + "2013-02-27 23:58:00 0.443969\n", + "2013-02-27 23:59:00 1.390478\n", + "2013-02-28 00:00:00 0.569440\n", + "\n", + "[83521 rows x 1 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[\"2013-1\":\"2013-2-28 00:00:00\"]" + ] + }, + { + "cell_type": "markdown", + "id": "f17d1737-ded7-4b19-8e20-718c80872edb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### We are stopping on the included end-point as it is part of the index:" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "445ed641-74d9-4026-ad94-3172565e7db2", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:05.761408Z", + "iopub.status.busy": "2024-08-29T21:14:05.761002Z", + "iopub.status.idle": "2024-08-29T21:14:14.045516Z", + "shell.execute_reply": "2024-08-29T21:14:14.044532Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-15 00:00:00-1.195459
2013-01-15 00:01:001.543360
2013-01-15 00:02:000.237914
2013-01-15 00:03:000.767214
2013-01-15 00:04:00-2.109814
......
2013-01-15 12:26:000.817564
2013-01-15 12:27:00-0.649760
2013-01-15 12:28:001.245159
2013-01-15 12:29:000.300473
2013-01-15 12:30:001.167551
\n", + "

751 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-15 00:00:00 -1.195459\n", + "2013-01-15 00:01:00 1.543360\n", + "2013-01-15 00:02:00 0.237914\n", + "2013-01-15 00:03:00 0.767214\n", + "2013-01-15 00:04:00 -2.109814\n", + "... ...\n", + "2013-01-15 12:26:00 0.817564\n", + "2013-01-15 12:27:00 -0.649760\n", + "2013-01-15 12:28:00 1.245159\n", + "2013-01-15 12:29:00 0.300473\n", + "2013-01-15 12:30:00 1.167551\n", + "\n", + "[751 rows x 1 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[\"2013-1-15\":\"2013-1-15 12:30:00\"]" + ] + }, + { + "cell_type": "markdown", + "id": "d87d800b-29fb-4b93-a28f-0de738a3ae07", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### DatetimeIndex partial string indexing also works on a DataFrame with a MultiIndex:" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "e8c3388b-374a-44a8-a060-b5531589bbf6", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:14.053801Z", + "iopub.status.busy": "2024-08-29T21:14:14.053416Z", + "iopub.status.idle": "2024-08-29T21:14:16.356105Z", + "shell.execute_reply": "2024-08-29T21:14:16.354167Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:00a-0.483797
b1.288057
2013-01-01 12:00:00a-0.129879
b-0.198078
2013-01-02 00:00:00a-0.334488
b-0.391443
2013-01-02 12:00:00a-0.612406
b-0.676524
2013-01-03 00:00:00a1.327230
b-0.448695
2013-01-03 12:00:00a-0.316407
b0.030831
2013-01-04 00:00:00a-0.313357
b-0.173259
2013-01-04 12:00:00a-0.327369
b0.944368
2013-01-05 00:00:00a1.122017
b0.112339
2013-01-05 12:00:00a1.372340
b2.062562
\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 a -0.483797\n", + " b 1.288057\n", + "2013-01-01 12:00:00 a -0.129879\n", + " b -0.198078\n", + "2013-01-02 00:00:00 a -0.334488\n", + " b -0.391443\n", + "2013-01-02 12:00:00 a -0.612406\n", + " b -0.676524\n", + "2013-01-03 00:00:00 a 1.327230\n", + " b -0.448695\n", + "2013-01-03 12:00:00 a -0.316407\n", + " b 0.030831\n", + "2013-01-04 00:00:00 a -0.313357\n", + " b -0.173259\n", + "2013-01-04 12:00:00 a -0.327369\n", + " b 0.944368\n", + "2013-01-05 00:00:00 a 1.122017\n", + " b 0.112339\n", + "2013-01-05 12:00:00 a 1.372340\n", + " b 2.062562" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft2 = pd.DataFrame(\n", + " np.random.randn(20, 1),\n", + " columns=[\"A\"],\n", + " index=pd.MultiIndex.from_product(\n", + " [pd.date_range(\"20130101\", periods=10, freq=\"12h\"), [\"a\", \"b\"]]\n", + " ),\n", + ")\n", + "\n", + "\n", + "dft2" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "cf34669d-75ef-4baf-89e0-c00b284fb23d", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:16.364311Z", + "iopub.status.busy": "2024-08-29T21:14:16.363818Z", + "iopub.status.idle": "2024-08-29T21:14:17.890076Z", + "shell.execute_reply": "2024-08-29T21:14:17.889537Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-05 00:00:00a1.122017
b0.112339
2013-01-05 12:00:00a1.372340
b2.062562
\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-05 00:00:00 a 1.122017\n", + " b 0.112339\n", + "2013-01-05 12:00:00 a 1.372340\n", + " b 2.062562" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft2.loc[\"2013-01-05\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "4fe86ab1-49df-4661-a653-b744cf985345", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:17.893683Z", + "iopub.status.busy": "2024-08-29T21:14:17.893463Z", + "iopub.status.idle": "2024-08-29T21:14:17.942532Z", + "shell.execute_reply": "2024-08-29T21:14:17.942111Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method DataFrame.swaplevel", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[73], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m idx \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mIndexSlice\n\u001b[0;32m----> 3\u001b[0m dft2 \u001b[38;5;241m=\u001b[39m \u001b[43mdft2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mswaplevel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39msort_index()\n\u001b[1;32m 5\u001b[0m dft2\u001b[38;5;241m.\u001b[39mloc[idx[:, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2013-01-05\u001b[39m\u001b[38;5;124m\"\u001b[39m], :]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method DataFrame.swaplevel" + ] + } + ], + "source": [ + "idx = pd.IndexSlice\n", + "\n", + "dft2 = dft2.swaplevel(0, 1).sort_index()\n", + "\n", + "dft2.loc[idx[:, \"2013-01-05\"], :]" + ] + }, + { + "cell_type": "markdown", + "id": "0596aa8e-1aa3-4854-91f0-2f512fd2d443", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Slicing with string indexing also honors UTC offset." + ] + }, + { + "cell_type": "markdown", + "id": "ad1362d9-364e-443a-b1fe-0029840d7d61", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Slice vs. exact match" + ] + }, + { + "cell_type": "markdown", + "id": "80b2fc81-d35b-4ed3-80af-baa52d5c288e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of the index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match.\n", + "\n", + "Consider a Series object with a minute resolution index:" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "cfaa9748-de47-4992-9b34-14da93db7b14", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:17.948629Z", + "iopub.status.busy": "2024-08-29T21:14:17.948432Z", + "iopub.status.idle": "2024-08-29T21:14:18.805828Z", + "shell.execute_reply": "2024-08-29T21:14:18.805536Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Index.resolution is not yet implemented", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/extensions/index.py:250\u001b[0m, in \u001b[0;36mIndex.__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mobject\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 251\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "\u001b[0;31mAttributeError\u001b[0m: 'DatetimeIndex' object has no attribute 'resolution'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[74], line 9\u001b[0m\n\u001b[1;32m 1\u001b[0m series_minute \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mSeries(\n\u001b[1;32m 2\u001b[0m [\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m3\u001b[39m],\n\u001b[1;32m 3\u001b[0m pd\u001b[38;5;241m.\u001b[39mDatetimeIndex(\n\u001b[1;32m 4\u001b[0m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2011-12-31 23:59:00\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-01-01 00:00:00\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-01-01 00:02:00\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 5\u001b[0m ),\n\u001b[1;32m 6\u001b[0m )\n\u001b[0;32m----> 9\u001b[0m \u001b[43mseries_minute\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresolution\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/extensions/index.py:257\u001b[0m, in \u001b[0;36mIndex.__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 253\u001b[0m native_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_NATIVE_INDEX_TYPE([])\n\u001b[1;32m 254\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(native_index, key):\n\u001b[1;32m 255\u001b[0m \u001b[38;5;66;03m# Any methods that not supported by the current Index.py but exist in a\u001b[39;00m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;66;03m# native pandas index object should raise a not implemented error for now.\u001b[39;00m\n\u001b[0;32m--> 257\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 258\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mIndex.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mkey\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m is not yet implemented\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 259\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Index.resolution is not yet implemented" + ] + } + ], + "source": [ + "series_minute = pd.Series(\n", + " [1, 2, 3],\n", + " pd.DatetimeIndex(\n", + " [\"2011-12-31 23:59:00\", \"2012-01-01 00:00:00\", \"2012-01-01 00:02:00\"]\n", + " ),\n", + ")\n", + "\n", + "\n", + "series_minute.index.resolution" + ] + }, + { + "cell_type": "markdown", + "id": "424987b9-88eb-4e81-b9b4-e4e188fabda6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### A timestamp string less accurate than a minute gives a Series object." + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "b8997514-bb79-4da7-bcbb-e0262d04412f", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:18.807978Z", + "iopub.status.busy": "2024-08-29T21:14:18.807841Z", + "iopub.status.idle": "2024-08-29T21:14:19.436725Z", + "shell.execute_reply": "2024-08-29T21:14:19.436157Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2011-12-31 23:59:00 1\n", + "Freq: None, dtype: int64" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series_minute[\"2011-12-31 23\"] # we return series instead" + ] + }, + { + "cell_type": "markdown", + "id": "971505bf-eed4-4fdc-a764-4572b25b353b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Exact indexing" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "1a24da8a-4736-4120-bb69-5380b6b59f53", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:19.439721Z", + "iopub.status.busy": "2024-08-29T21:14:19.439536Z", + "iopub.status.idle": "2024-08-29T21:14:28.000648Z", + "shell.execute_reply": "2024-08-29T21:14:27.999577Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-02-27 23:56:00-0.036098
2013-02-27 23:57:00-1.679458
2013-02-27 23:58:000.443969
2013-02-27 23:59:001.390478
2013-02-28 00:00:000.569440
\n", + "

83521 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-02-27 23:56:00 -0.036098\n", + "2013-02-27 23:57:00 -1.679458\n", + "2013-02-27 23:58:00 0.443969\n", + "2013-02-27 23:59:00 1.390478\n", + "2013-02-28 00:00:00 0.569440\n", + "\n", + "[83521 rows x 1 columns]" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[datetime.datetime(2013, 1, 1): datetime.datetime(2013, 2, 28)]" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "d62bee69-00d5-4fed-bf7c-5f6aee22eade", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:28.008282Z", + "iopub.status.busy": "2024-08-29T21:14:28.007924Z", + "iopub.status.idle": "2024-08-29T21:14:36.438112Z", + "shell.execute_reply": "2024-08-29T21:14:36.434977Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 10:12:000.605120
2013-01-01 10:13:000.895556
2013-01-01 10:14:00-0.131909
2013-01-01 10:15:000.404762
2013-01-01 10:16:000.223844
......
2013-02-28 10:08:000.746108
2013-02-28 10:09:001.754498
2013-02-28 10:10:00-0.622373
2013-02-28 10:11:00-0.449793
2013-02-28 10:12:000.848123
\n", + "

83521 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 10:12:00 0.605120\n", + "2013-01-01 10:13:00 0.895556\n", + "2013-01-01 10:14:00 -0.131909\n", + "2013-01-01 10:15:00 0.404762\n", + "2013-01-01 10:16:00 0.223844\n", + "... ...\n", + "2013-02-28 10:08:00 0.746108\n", + "2013-02-28 10:09:00 1.754498\n", + "2013-02-28 10:10:00 -0.622373\n", + "2013-02-28 10:11:00 -0.449793\n", + "2013-02-28 10:12:00 0.848123\n", + "\n", + "[83521 rows x 1 columns]" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[\n", + " datetime.datetime(2013, 1, 1, 10, 12, 0): datetime.datetime(\n", + " 2013, 2, 28, 10, 12, 0\n", + " )\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "0231045c-df35-4892-9069-8819d41a8d55", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Truncating & fancy indexing" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "62e2459c-ab53-4f7c-aea7-a623717d4f36", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:36.443746Z", + "iopub.status.busy": "2024-08-29T21:14:36.443343Z", + "iopub.status.idle": "2024-08-29T21:14:38.169327Z", + "shell.execute_reply": "2024-08-29T21:14:38.168887Z" + }, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method Series.truncate", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[78], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m rng2 \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mdate_range(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2011-01-01\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-01-01\u001b[39m\u001b[38;5;124m\"\u001b[39m, freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mW\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m ts2 \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mSeries(np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mrandn(\u001b[38;5;28mlen\u001b[39m(rng2)), index\u001b[38;5;241m=\u001b[39mrng2)\n\u001b[0;32m----> 5\u001b[0m \u001b[43mts2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtruncate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbefore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m2011-11\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mafter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m2011-12\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method Series.truncate" + ] + } + ], + "source": [ + "rng2 = pd.date_range(\"2011-01-01\", \"2012-01-01\", freq=\"W\")\n", + "\n", + "ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)\n", + "\n", + "ts2.truncate(before=\"2011-11\", after=\"2011-12\")" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "6d0d2c8b-205f-4a57-b7bd-301c7531b1ad", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:38.175988Z", + "iopub.status.busy": "2024-08-29T21:14:38.175829Z", + "iopub.status.idle": "2024-08-29T21:14:38.947568Z", + "shell.execute_reply": "2024-08-29T21:14:38.946986Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2011-11-06 -0.433416\n", + "2011-11-13 0.773872\n", + "2011-11-20 -0.834212\n", + "2011-11-27 -0.728240\n", + "2011-12-04 0.674975\n", + "2011-12-11 -0.477772\n", + "2011-12-18 1.492301\n", + "2011-12-25 -0.658391\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts2[\"2011-11\":\"2011-12\"]" + ] + }, + { + "cell_type": "markdown", + "id": "362a58c6-68a8-4103-96dd-569752c6d199", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Time/date components" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "0a5647ae-a168-4c3e-98a5-381cdd2c61c2", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:38.952582Z", + "iopub.status.busy": "2024-08-29T21:14:38.952189Z", + "iopub.status.idle": "2024-08-29T21:14:39.580562Z", + "shell.execute_reply": "2024-08-29T21:14:39.580158Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Index.isocalendar is not yet implemented", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/extensions/index.py:250\u001b[0m, in \u001b[0;36mIndex.__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mobject\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 251\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "\u001b[0;31mAttributeError\u001b[0m: 'DatetimeIndex' object has no attribute 'isocalendar'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[80], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m idx \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mdate_range(start\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2019-12-29\u001b[39m\u001b[38;5;124m\"\u001b[39m, freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD\u001b[39m\u001b[38;5;124m\"\u001b[39m, periods\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43midx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43misocalendar\u001b[49m()\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/extensions/index.py:257\u001b[0m, in \u001b[0;36mIndex.__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 253\u001b[0m native_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_NATIVE_INDEX_TYPE([])\n\u001b[1;32m 254\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(native_index, key):\n\u001b[1;32m 255\u001b[0m \u001b[38;5;66;03m# Any methods that not supported by the current Index.py but exist in a\u001b[39;00m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;66;03m# native pandas index object should raise a not implemented error for now.\u001b[39;00m\n\u001b[0;32m--> 257\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 258\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mIndex.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mkey\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m is not yet implemented\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 259\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Index.isocalendar is not yet implemented" + ] + } + ], + "source": [ + "idx = pd.date_range(start=\"2019-12-29\", freq=\"D\", periods=4)\n", + "\n", + "idx.isocalendar()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "0c48589b-bcf8-4d43-a1d6-d2df60ff0e0f", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:39.583089Z", + "iopub.status.busy": "2024-08-29T21:14:39.582784Z", + "iopub.status.idle": "2024-08-29T21:14:41.713938Z", + "shell.execute_reply": "2024-08-29T21:14:41.713060Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearweekday
2019-12-292019527
2019-12-30202011
2019-12-31202012
2020-01-01202013
\n", + "
" + ], + "text/plain": [ + " year week day\n", + "2019-12-29 2019 52 7\n", + "2019-12-30 2020 1 1\n", + "2019-12-31 2020 1 2\n", + "2020-01-01 2020 1 3" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx.to_series().dt.isocalendar()" + ] + }, + { + "cell_type": "markdown", + "id": "9b5b10c8-e72f-4405-bdeb-1bced14c8edf", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# DateOffset objects" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "1febbd6a-1b57-4e6a-a48a-3eac565ad61d", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:41.721027Z", + "iopub.status.busy": "2024-08-29T21:14:41.720702Z", + "iopub.status.idle": "2024-08-29T21:14:41.729443Z", + "shell.execute_reply": "2024-08-29T21:14:41.728809Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts = pd.Timestamp(\"2016-10-30 00:00:00\", tz=\"Europe/Helsinki\")\n", + "\n", + "ts + pd.Timedelta(days=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "b8d03fb0-826f-4698-a6d0-f2b63f7d38dc", + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-29T21:14:41.733948Z", + "iopub.status.busy": "2024-08-29T21:14:41.733522Z", + "iopub.status.idle": "2024-08-29T21:14:41.737568Z", + "shell.execute_reply": "2024-08-29T21:14:41.737119Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts + pd.DateOffset(days=1)" + ] + }, + { + "cell_type": "markdown", + "id": "7d4c2685-79b1-4262-9c53-e0901b730d36", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Time Series-related instance methods" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "7544f420-459f-4320-89f4-a9897ad4daf8", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:41.742020Z", + "iopub.status.busy": "2024-08-29T21:14:41.741770Z", + "iopub.status.idle": "2024-08-29T21:14:43.045122Z", + "shell.execute_reply": "2024-08-29T21:14:43.043759Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2011-01-31 NaN\n", + "2011-02-28 0.0\n", + "2011-03-31 1.0\n", + "2011-04-30 2.0\n", + "2011-05-31 3.0\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts = pd.Series(range(len(rng)), index=rng)\n", + "\n", + "ts = ts[:5]\n", + "\n", + "ts.shift(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "ee59ce08-9bd7-4fc2-93e1-55868cb47028", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:43.050774Z", + "iopub.status.busy": "2024-08-29T21:14:43.050375Z", + "iopub.status.idle": "2024-08-29T21:14:43.284463Z", + "shell.execute_reply": "2024-08-29T21:14:43.284107Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "shifting index values not yet supported.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[85], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mD\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:2551\u001b[0m, in \u001b[0;36mSeries.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 2547\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 2548\u001b[0m \u001b[38;5;66;03m# pandas compatible error.\u001b[39;00m\n\u001b[1;32m 2549\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo axis named 1 for object type Series\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/base.py:3125\u001b[0m, in \u001b[0;36mBasePandasDataset.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 3122\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;241m==\u001b[39m no_default:\n\u001b[1;32m 3123\u001b[0m fill_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3125\u001b[0m new_query_compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query_compiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3126\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\n\u001b[1;32m 3127\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3128\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_or_update_from_compiler(new_query_compiler, \u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1672\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 1669\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_shift_values(periods, axis, fill_value) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1670\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1671\u001b[0m \u001b[38;5;66;03m# axis parameter ignored, should be 0 for manipulating index. Revisit in SNOW-1023324\u001b[39;00m\n\u001b[0;32m-> 1672\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_shift_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1636\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler._shift_index\u001b[0;34m(self, periods, freq)\u001b[0m\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m freq \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfreq must be specified when calling shift index\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1635\u001b[0m \u001b[38;5;66;03m# TODO: SNOW-1023324, implement shifting index only.\u001b[39;00m\n\u001b[0;32m-> 1636\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshifting index values not yet supported.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: shifting index values not yet supported." + ] + } + ], + "source": [ + "ts.shift(5, freq=\"D\")" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "3548edbe-b2c0-4d7b-b111-b90931af0306", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:43.306275Z", + "iopub.status.busy": "2024-08-29T21:14:43.306123Z", + "iopub.status.idle": "2024-08-29T21:14:43.358939Z", + "shell.execute_reply": "2024-08-29T21:14:43.358609Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "shifting index values not yet supported.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[86], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moffsets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mBDay\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:2551\u001b[0m, in \u001b[0;36mSeries.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 2547\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 2548\u001b[0m \u001b[38;5;66;03m# pandas compatible error.\u001b[39;00m\n\u001b[1;32m 2549\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo axis named 1 for object type Series\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/base.py:3125\u001b[0m, in \u001b[0;36mBasePandasDataset.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 3122\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;241m==\u001b[39m no_default:\n\u001b[1;32m 3123\u001b[0m fill_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3125\u001b[0m new_query_compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query_compiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3126\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\n\u001b[1;32m 3127\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3128\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_or_update_from_compiler(new_query_compiler, \u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1672\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 1669\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_shift_values(periods, axis, fill_value) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1670\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1671\u001b[0m \u001b[38;5;66;03m# axis parameter ignored, should be 0 for manipulating index. Revisit in SNOW-1023324\u001b[39;00m\n\u001b[0;32m-> 1672\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_shift_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1636\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler._shift_index\u001b[0;34m(self, periods, freq)\u001b[0m\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m freq \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfreq must be specified when calling shift index\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1635\u001b[0m \u001b[38;5;66;03m# TODO: SNOW-1023324, implement shifting index only.\u001b[39;00m\n\u001b[0;32m-> 1636\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshifting index values not yet supported.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: shifting index values not yet supported." + ] + } + ], + "source": [ + "ts.shift(5, freq=pd.offsets.BDay())" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "a980c67a-1717-425b-b74f-a277cf73f576", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:43.380989Z", + "iopub.status.busy": "2024-08-29T21:14:43.380843Z", + "iopub.status.idle": "2024-08-29T21:14:43.436653Z", + "shell.execute_reply": "2024-08-29T21:14:43.436299Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "shifting index values not yet supported.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[87], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mBME\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:2551\u001b[0m, in \u001b[0;36mSeries.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 2547\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 2548\u001b[0m \u001b[38;5;66;03m# pandas compatible error.\u001b[39;00m\n\u001b[1;32m 2549\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo axis named 1 for object type Series\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/base.py:3125\u001b[0m, in \u001b[0;36mBasePandasDataset.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 3122\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;241m==\u001b[39m no_default:\n\u001b[1;32m 3123\u001b[0m fill_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3125\u001b[0m new_query_compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query_compiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3126\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\n\u001b[1;32m 3127\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3128\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_or_update_from_compiler(new_query_compiler, \u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1672\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 1669\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_shift_values(periods, axis, fill_value) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1670\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1671\u001b[0m \u001b[38;5;66;03m# axis parameter ignored, should be 0 for manipulating index. Revisit in SNOW-1023324\u001b[39;00m\n\u001b[0;32m-> 1672\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_shift_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1636\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler._shift_index\u001b[0;34m(self, periods, freq)\u001b[0m\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m freq \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfreq must be specified when calling shift index\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1635\u001b[0m \u001b[38;5;66;03m# TODO: SNOW-1023324, implement shifting index only.\u001b[39;00m\n\u001b[0;32m-> 1636\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshifting index values not yet supported.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: shifting index values not yet supported." + ] + } + ], + "source": [ + "ts.shift(5, freq=\"BME\")" + ] + }, + { + "cell_type": "markdown", + "id": "00d1f5cf-c073-4c60-949b-404953b80000", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Frequency conversion" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "0a0b12b0-3ed9-476e-b98c-c1d04162627b", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:43.455819Z", + "iopub.status.busy": "2024-08-29T21:14:43.455677Z", + "iopub.status.idle": "2024-08-29T21:14:43.498018Z", + "shell.execute_reply": "2024-08-29T21:14:43.497707Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "offset B is not implemented in Snowpark pandas API", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[88], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m dr \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m1/1/2010\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moffsets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mBDay\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m ts \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mSeries(np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mrandn(\u001b[38;5;241m3\u001b[39m), index\u001b[38;5;241m=\u001b[39mdr)\n\u001b[1;32m 5\u001b[0m ts\u001b[38;5;241m.\u001b[39masfreq(pd\u001b[38;5;241m.\u001b[39moffsets\u001b[38;5;241m.\u001b[39mBDay())\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:454\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_standalone_function_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/general.py:2405\u001b[0m, in \u001b[0;36mdate_range\u001b[0;34m(start, end, periods, freq, tz, normalize, name, inclusive, **kwargs)\u001b[0m\n\u001b[1;32m 2401\u001b[0m \u001b[38;5;66;03m# If a timezone is not explicitly given via `tz`, see if one can be inferred from the `start` and `end` endpoints.\u001b[39;00m\n\u001b[1;32m 2402\u001b[0m \u001b[38;5;66;03m# If more than one of these inputs provides a timezone, require that they all agree.\u001b[39;00m\n\u001b[1;32m 2403\u001b[0m tz \u001b[38;5;241m=\u001b[39m _infer_tz_from_endpoints(start, end, tz)\n\u001b[0;32m-> 2405\u001b[0m qc \u001b[38;5;241m=\u001b[39m \u001b[43mSnowflakeQueryCompiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_date_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2406\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2407\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2408\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2409\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2410\u001b[0m \u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2411\u001b[0m \u001b[43m \u001b[49m\u001b[43mleft_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mleft_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2412\u001b[0m \u001b[43m \u001b[49m\u001b[43mright_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mright_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2413\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2414\u001b[0m \u001b[38;5;66;03m# Set date range as index column.\u001b[39;00m\n\u001b[1;32m 2415\u001b[0m qc \u001b[38;5;241m=\u001b[39m qc\u001b[38;5;241m.\u001b[39mset_index_from_columns(qc\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mtolist(), include_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:691\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_date_range\u001b[0;34m(cls, start, end, periods, freq, tz, left_inclusive, right_inclusive)\u001b[0m\n\u001b[1;32m 689\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m ns_values\u001b[38;5;241m.\u001b[39mseries_to_datetime()\n\u001b[1;32m 690\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 691\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m \u001b[43mgenerator_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_irregular_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 692\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\n\u001b[1;32m 693\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 694\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 695\u001b[0m \u001b[38;5;66;03m# Create a linearly spaced date_range in local time\u001b[39;00m\n\u001b[1;32m 696\u001b[0m \u001b[38;5;66;03m# This is the original pandas source code:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[1;32m 701\u001b[0m \u001b[38;5;66;03m# Here we implement it similarly as np.linspace\u001b[39;00m\n\u001b[1;32m 702\u001b[0m div \u001b[38;5;241m=\u001b[39m periods \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;66;03m# type: ignore[operator]\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:216\u001b[0m, in \u001b[0;36mgenerate_irregular_range\u001b[0;34m(start, end, periods, offset)\u001b[0m\n\u001b[1;32m 213\u001b[0m periods \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 215\u001b[0m num_offsets \u001b[38;5;241m=\u001b[39m get_active_session()\u001b[38;5;241m.\u001b[39mrange(start\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, end\u001b[38;5;241m=\u001b[39mperiods, step\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 216\u001b[0m sf_date_or_time_part \u001b[38;5;241m=\u001b[39m \u001b[43m_offset_name_to_sf_date_or_time_part\u001b[49m\u001b[43m(\u001b[49m\u001b[43moffset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 217\u001b[0m dt_col \u001b[38;5;241m=\u001b[39m builtin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDATEADD\u001b[39m\u001b[38;5;124m\"\u001b[39m)(\n\u001b[1;32m 218\u001b[0m sf_date_or_time_part,\n\u001b[1;32m 219\u001b[0m offset\u001b[38;5;241m.\u001b[39mn \u001b[38;5;241m*\u001b[39m col(num_offsets\u001b[38;5;241m.\u001b[39mcolumns[\u001b[38;5;241m0\u001b[39m]),\n\u001b[1;32m 220\u001b[0m pandas_lit(start),\n\u001b[1;32m 221\u001b[0m )\n\u001b[1;32m 222\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m offset\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01min\u001b[39;00m LAST_DAY:\n\u001b[1;32m 223\u001b[0m \u001b[38;5;66;03m# When last day is required, we need to explicitly call LAST_DAY SQL function to convert DATEADD results to the\u001b[39;00m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;66;03m# last day, e.g., adding one month to \"2/29/2024\" using DATEADD results \"3/29/2024\", which is not the last day\u001b[39;00m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# of March. So we need to call LAST_DAY. Also, LAST_DAY only return the date, then we need to reconstruct the\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# timestamp using timestamp_ntz_from_parts\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:162\u001b[0m, in \u001b[0;36m_offset_name_to_sf_date_or_time_part\u001b[0;34m(name)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP:\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP[name]\n\u001b[0;32m--> 162\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43moffset \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m is not implemented in Snowpark pandas API\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: offset B is not implemented in Snowpark pandas API" + ] + } + ], + "source": [ + "dr = pd.date_range(\"1/1/2010\", periods=3, freq=3 * pd.offsets.BDay())\n", + "\n", + "ts = pd.Series(np.random.randn(3), index=dr)\n", + "\n", + "ts.asfreq(pd.offsets.BDay())" + ] + }, + { + "cell_type": "markdown", + "id": "0bf19b9b-5cf1-4d1e-958b-a056a366464e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Converting between representations" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "a0e7fb20-dd4a-456a-8557-aeea61ac0c3b", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:43.513174Z", + "iopub.status.busy": "2024-08-29T21:14:43.513031Z", + "iopub.status.idle": "2024-08-29T21:14:45.394562Z", + "shell.execute_reply": "2024-08-29T21:14:45.393409Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2012-01-31 -0.513578\n", + "2012-02-29 -0.471256\n", + "2012-03-31 1.608285\n", + "2012-04-30 -1.384413\n", + "2012-05-31 2.278423\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rng = pd.date_range(\"1/1/2012\", periods=5, freq=\"ME\")\n", + "\n", + "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n", + "\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "96b177da-d407-4297-bcc5-d7f2a8b4ee12", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.400225Z", + "iopub.status.busy": "2024-08-29T21:14:45.399854Z", + "iopub.status.idle": "2024-08-29T21:14:45.449274Z", + "shell.execute_reply": "2024-08-29T21:14:45.448910Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method Series.to_period", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[90], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m ps \u001b[38;5;241m=\u001b[39m \u001b[43mts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_period\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m ps\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method Series.to_period" + ] + } + ], + "source": [ + "ps = ts.to_period()\n", + "\n", + "ps" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "id": "781d74b4-a7fd-4f58-a8af-fa21598502b0", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.455603Z", + "iopub.status.busy": "2024-08-29T21:14:45.455404Z", + "iopub.status.idle": "2024-08-29T21:14:45.464053Z", + "shell.execute_reply": "2024-08-29T21:14:45.463774Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'ps' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[91], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mps\u001b[49m\u001b[38;5;241m.\u001b[39mto_timestamp()\n", + "\u001b[0;31mNameError\u001b[0m: name 'ps' is not defined" + ] + } + ], + "source": [ + "ps.to_timestamp()" + ] + }, + { + "cell_type": "markdown", + "id": "bf270f9c-86e0-4161-a187-7b0dbb2e47b6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "### Timestamp Binary Operations" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "dd818a8d-97c1-46f3-b29f-499ba92f22ae", + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.465935Z", + "iopub.status.busy": "2024-08-29T21:14:45.465805Z", + "iopub.status.idle": "2024-08-29T21:14:45.468720Z", + "shell.execute_reply": "2024-08-29T21:14:45.468476Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('396 days 03:00:00')" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime('2018-10-26 12:00:00') - pd.to_datetime('2017-09-25 09:00:00')" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "7c9a87d2-7883-46a6-8433-dfa5900ca9b0", + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.470366Z", + "iopub.status.busy": "2024-08-29T21:14:45.470260Z", + "iopub.status.idle": "2024-08-29T21:14:45.472362Z", + "shell.execute_reply": "2024-08-29T21:14:45.472118Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('6 days 07:00:00')" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Timestamp(\"2014-08-01 10:00\") - pd.Timestamp(\"2014-07-26 03:00\")" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "e78454b1-0d4c-42bc-a127-b21a4a7f09cf", + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.474079Z", + "iopub.status.busy": "2024-08-29T21:14:45.473971Z", + "iopub.status.idle": "2024-08-29T21:14:45.476309Z", + "shell.execute_reply": "2024-08-29T21:14:45.476045Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('682 days 03:00:00')" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Timestamp(year=2017, month=1, day=1, hour=12) - pd.Timestamp(year=2015, month=2, day=19, hour=9)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "2534d141-1862-4901-ba70-7ed73ab9abdd", + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.478042Z", + "iopub.status.busy": "2024-08-29T21:14:45.477931Z", + "iopub.status.idle": "2024-08-29T21:14:45.480738Z", + "shell.execute_reply": "2024-08-29T21:14:45.480456Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('-31 days +03:09:02')" + ] + }, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } diff --git a/tests/unit/modin/modin/test_envvars.py b/tests/unit/modin/modin/test_envvars.py index 4f3540a63bf..7c5e3a40bb0 100644 --- a/tests/unit/modin/modin/test_envvars.py +++ b/tests/unit/modin/modin/test_envvars.py @@ -90,6 +90,32 @@ def test_custom_help(make_custom_envvar): assert "custom var" in make_custom_envvar.get_help() +def _init_doc_module(): + # Put the docs_module on the path + sys.path.append(f"{os.path.dirname(__file__)}") + # We use base.py from upstream modin, so we need to initialize its doc module + # However, since using the environment variable causes an importlib.reload call, + # we need to manually call _inherit_docstrings (https://github.com/modin-project/modin/issues/7138) + from .docs_module import classes + + # As a workaround for upstream modin bugs, we use our own _inherit_docstrings instead of the upstream + # function. We accordingly need to clear the docstring dictionary in testing because + # we manually called the annotation on initializing snowflake.snowpark.modin.pandas. + # snowflake.snowpark.modin.utils._attributes_with_docstrings_replaced.clear() + # TODO: once modin 0.31.0 is available, use the actual modin DocModule class + snowflake.snowpark.modin.utils._inherit_docstrings( + classes.BasePandasDataset, + overwrite_existing=True, + )(pd.base.BasePandasDataset) + DocModule.put("docs_module") + + +DOC_OVERRIDE_XFAIL_REASON = ( + "test docstring overrides currently cannot override real docstring overrides until " + "modin 0.31.0 is available" +) + + class TestDocModule: """ Test using a module to replace default docstrings. @@ -99,11 +125,9 @@ class TestDocModule: which we need to fix in upstream modin. """ + @pytest.mark.xfail(strict=True, reason=DOC_OVERRIDE_XFAIL_REASON) def test_overrides(self): - # Put the docs_module on the path - sys.path.append(f"{os.path.dirname(__file__)}") - DocModule.put("docs_module") - + _init_doc_module() # Test for override # TODO(https://github.com/modin-project/modin/issues/7134): Upstream # the BasePandasDataset tests to modin. @@ -144,11 +168,7 @@ def test_overrides(self): def test_not_redefining_classes_modin_issue_7138(self): original_dataframe_class = pd.DataFrame - - # Put the docs_module on the path - sys.path.append(f"{os.path.dirname(__file__)}") - DocModule.put("docs_module") - + _init_doc_module() # Test for override assert ( pd.DataFrame.apply.__doc__ @@ -157,22 +177,20 @@ def test_not_redefining_classes_modin_issue_7138(self): assert pd.DataFrame is original_dataframe_class + @pytest.mark.xfail(strict=True, reason=DOC_OVERRIDE_XFAIL_REASON) def test_base_docstring_override_with_no_dataframe_or_series_class_modin_issue_7113( self, ): # TODO(https://github.com/modin-project/modin/issues/7113): Upstream # this test case to Modin. This test case tests scenario 1 from issue 7113. - sys.path.append(f"{os.path.dirname(__file__)}") - DocModule.put("docs_module_with_just_base") + _init_doc_module() assert pd.base.BasePandasDataset.astype.__doc__ == ( "This is a test of the documentation module for BasePandasDataSet.astype." ) + @pytest.mark.xfail(strict=True, reason=DOC_OVERRIDE_XFAIL_REASON) def test_base_property_not_overridden_in_either_subclass_modin_issue_7113(self): - # Put the docs_module on the path - sys.path.append(f"{os.path.dirname(__file__)}") - DocModule.put("docs_module") - + _init_doc_module() assert ( pd.base.BasePandasDataset.loc.__doc__ == "This is a test of the documentation module for BasePandasDataset.loc." diff --git a/tests/unit/modin/test_join_utils.py b/tests/unit/modin/test_join_utils.py new file mode 100644 index 00000000000..031ab13bef9 --- /dev/null +++ b/tests/unit/modin/test_join_utils.py @@ -0,0 +1,97 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +from collections.abc import Hashable +from unittest import mock + +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from snowflake.snowpark.modin.plugin._internal.frame import ( + OrderedDataFrame, + OrderingColumn, +) +from snowflake.snowpark.modin.plugin._internal.join_utils import ( + InheritJoinIndex, + JoinKeyCoalesceConfig, + _create_internal_frame_with_join_or_align_result, +) +from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import ( + InternalFrame, +) + + +def mock_internal_frame( + data_column_pandas_labels: list[Hashable], + data_column_pandas_index_names: list[Hashable], + data_column_snowflake_quoted_identifiers: list[str], + index_column_pandas_labels: list[Hashable], + index_column_snowflake_quoted_identifiers: list[str], +) -> InternalFrame: + ordered_dataframe = mock.create_autospec(OrderedDataFrame) + ordered_dataframe.projected_column_snowflake_quoted_identifiers = ( + data_column_snowflake_quoted_identifiers + + index_column_snowflake_quoted_identifiers + ) + ordered_dataframe.ordering_columns = [ + OrderingColumn(col) + for col in ordered_dataframe.projected_column_snowflake_quoted_identifiers + ] + internal_frame = InternalFrame.create( + ordered_dataframe=ordered_dataframe, + data_column_pandas_labels=data_column_pandas_labels, + data_column_pandas_index_names=data_column_pandas_index_names, + data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers, + index_column_pandas_labels=index_column_pandas_labels, + index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers, + data_column_types=[None] * len(data_column_pandas_labels), + index_column_types=[None] * len(index_column_pandas_labels), + ) + + return internal_frame + + +def test_create_internal_frame_with_result_using_invalid_methods(): + left_frame = mock_internal_frame( + data_column_pandas_labels=["a1", "b1"], + data_column_pandas_index_names=[None], + data_column_snowflake_quoted_identifiers=['"A1"', '"B1"'], + index_column_pandas_labels=["i1"], + index_column_snowflake_quoted_identifiers=['"I1"'], + ) + + right_frame = mock_internal_frame( + data_column_pandas_labels=["a2", "b2"], + data_column_pandas_index_names=[None], + data_column_snowflake_quoted_identifiers=['"A2"', '"B2"'], + index_column_pandas_labels=["i2"], + index_column_snowflake_quoted_identifiers=['"I2"'], + ) + + result_ordered_frame = mock.create_autospec(OrderedDataFrame) + result_ordered_frame.projected_column_snowflake_quoted_identifiers = [ + '"I1"', + '"A1"', + '"B1"', + '"I2"', + '"A2"', + '"B2"', + ] + result_ordered_frame._ordering_columns_tuple = [ + OrderingColumn('"I1"'), + OrderingColumn('"I2"'), + ] + + with pytest.raises(AssertionError, match="Unsupported join/align type invalid"): + _create_internal_frame_with_join_or_align_result( + result_ordered_frame=result_ordered_frame, + left=left_frame, + right=right_frame, + how="invalid", + left_on=['"I1"'], + right_on=['"I2"'], + sort=False, + key_coalesce_config=[JoinKeyCoalesceConfig.LEFT], + inherit_index=InheritJoinIndex.FROM_LEFT, + )