diff --git a/CHANGELOG.md b/CHANGELOG.md index d1197179e1c..b24bfc59466 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -76,6 +76,7 @@ - support for lazy `TimedeltaIndex`. - support for `pd.to_timedelta`. - support for `GroupBy` aggregations `min`, `max`, `mean`, `idxmax`, `idxmin`, `std`, `sum`, `median`, `count`, `any`, `all`, `size`, `nunique`. + - support for `TimedeltaIndex` attributes: `days`, `seconds`, `microseconds` and `nanoseconds`. - Added support for index's arithmetic and comparison operators. - Added support for `Series.dt.round`. - Added documentation pages for `DatetimeIndex`. @@ -89,16 +90,29 @@ - Added support for `Index.is_boolean`, `Index.is_integer`, `Index.is_floating`, `Index.is_numeric`, and `Index.is_object`. - Added support for `DatetimeIndex.round`, `DatetimeIndex.floor` and `DatetimeIndex.ceil`. - Added support for `Series.dt.days_in_month` and `Series.dt.daysinmonth`. +- Added support for `DataFrameGroupBy.value_counts` and `SeriesGroupBy.value_counts`. +- Added support for `Series.is_monotonic_increasing` and `Series.is_monotonic_decreasing`. +- Added support for `Index.is_monotonic_increasing` and `Index.is_monotonic_decreasing`. +- Added support for `pd.crosstab`. #### Improvements - Refactored `quoted_identifier_to_snowflake_type` to avoid making metadata queries if the types have been cached locally. +- Improved `pd.to_datetime` to handle all local input cases. #### Bug Fixes - Stopped ignoring nanoseconds in `pd.Timedelta` scalars. - Fixed AssertionError in tree of binary operations. +#### Behavior Change + +- When calling `DataFrame.set_index`, or setting `DataFrame.index` or `Series.index`, with a new index that does not match the current length of the `Series`/`DataFrame` object, a `ValueError` is no longer raised. When the `Series`/`DataFrame` object is longer than the new index, the `Series`/`DataFrame`'s new index is filled with `NaN` values for the "extra" elements. When the `Series`/`DataFrame` object is shorter than the new index, the extra values in the new index are ignored—`Series` and `DataFrame` stay the same length `n`, and use only the first `n` values of the new index. + +#### Improvements + +- Improve concat, join performance when operations are performed on series coming from the same dataframe by avoiding unnecessary joins. + ## 1.21.0 (2024-08-19) ### Snowpark Python API Updates diff --git a/docs/source/modin/general_functions.rst b/docs/source/modin/general_functions.rst index 803a901ac15..858bc54003e 100644 --- a/docs/source/modin/general_functions.rst +++ b/docs/source/modin/general_functions.rst @@ -11,6 +11,7 @@ General functions :toctree: pandas_api/ melt + crosstab pivot pivot_table cut diff --git a/docs/source/modin/groupby.rst b/docs/source/modin/groupby.rst index 97c99ce383d..e27a3bcf547 100644 --- a/docs/source/modin/groupby.rst +++ b/docs/source/modin/groupby.rst @@ -59,6 +59,7 @@ GroupBy DataFrameGroupBy.std DataFrameGroupBy.sum DataFrameGroupBy.tail + DataFrameGroupBy.value_counts DataFrameGroupBy.var .. rubric:: `SeriesGroupBy` computations / descriptive stats @@ -90,4 +91,5 @@ GroupBy SeriesGroupBy.std SeriesGroupBy.sum SeriesGroupBy.tail + SeriesGroupBy.value_counts SeriesGroupBy.var diff --git a/docs/source/modin/series.rst b/docs/source/modin/series.rst index 507d6663f32..fbd936db2f9 100644 --- a/docs/source/modin/series.rst +++ b/docs/source/modin/series.rst @@ -26,6 +26,8 @@ Series Series.equals Series.empty Series.hasnans + Series.is_monotonic_increasing + Series.is_monotonic_decreasing Series.name Series.ndim Series.shape diff --git a/docs/source/modin/supported/general_supported.rst b/docs/source/modin/supported/general_supported.rst index b055ed9dc6d..a12951d00f6 100644 --- a/docs/source/modin/supported/general_supported.rst +++ b/docs/source/modin/supported/general_supported.rst @@ -18,7 +18,10 @@ Data manipulations | ``concat`` | P | ``levels`` is not supported, | | | | | ``copy`` is ignored | | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ -| ``crosstab`` | N | | | +| ``crosstab`` | P | | ``N`` if ``aggfunc`` is not one of | +| | | | "count", "mean", "min", "max", or "sum", or | +| | | | margins is True, normalize is "all" or True, | +| | | | and values is passed. | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ | ``cut`` | P | ``retbins``, ``labels`` | ``N`` if ``retbins=True``or ``labels!=False`` | +-----------------------------+---------------------------------+----------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/groupby_supported.rst b/docs/source/modin/supported/groupby_supported.rst index f9ef001af29..3bcf3538216 100644 --- a/docs/source/modin/supported/groupby_supported.rst +++ b/docs/source/modin/supported/groupby_supported.rst @@ -166,7 +166,7 @@ Computations/descriptive stats +-----------------------------+---------------------------------+----------------------------------------------------+ | ``take`` | N | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``value_counts`` | N | | +| ``value_counts`` | P | ``N`` if ``bins`` is given for SeriesGroupBy | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``var`` | P | See ``std`` | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/index_supported.rst b/docs/source/modin/supported/index_supported.rst index 9db80686454..0c413c201fb 100644 --- a/docs/source/modin/supported/index_supported.rst +++ b/docs/source/modin/supported/index_supported.rst @@ -20,9 +20,9 @@ Attributes +-----------------------------+---------------------------------+----------------------------------------------------+ | ``values`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_increasing`` | N | | +| ``is_monotonic_increasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_decreasing`` | N | | +| ``is_monotonic_decreasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``is_unique`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/series_supported.rst b/docs/source/modin/supported/series_supported.rst index 331be4d0298..618b88d5034 100644 --- a/docs/source/modin/supported/series_supported.rst +++ b/docs/source/modin/supported/series_supported.rst @@ -43,9 +43,9 @@ Attributes +-----------------------------+---------------------------------+----------------------------------------------------+ | ``index`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_decreasing`` | N | | +| ``is_monotonic_decreasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``is_monotonic_increasing`` | N | | +| ``is_monotonic_increasing`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``is_unique`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/docs/source/modin/supported/timedelta_index_supported.rst b/docs/source/modin/supported/timedelta_index_supported.rst index 73abe530fd7..cd5e64b8c98 100644 --- a/docs/source/modin/supported/timedelta_index_supported.rst +++ b/docs/source/modin/supported/timedelta_index_supported.rst @@ -15,13 +15,13 @@ Attributes +-----------------------------+---------------------------------+----------------------------------------------------+ | TimedeltaIndex attribute | Snowpark implemented? (Y/N/P/D) | Notes for current implementation | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``days`` | N | | +| ``days`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``seconds`` | N | | +| ``seconds`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``microseconds`` | N | | +| ``microseconds`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ -| ``nanoseconds`` | N | | +| ``nanoseconds`` | Y | | +-----------------------------+---------------------------------+----------------------------------------------------+ | ``components`` | N | | +-----------------------------+---------------------------------+----------------------------------------------------+ diff --git a/src/snowflake/snowpark/_internal/analyzer/analyzer_utils.py b/src/snowflake/snowpark/_internal/analyzer/analyzer_utils.py index 01b25889076..86ffaf533ac 100644 --- a/src/snowflake/snowpark/_internal/analyzer/analyzer_utils.py +++ b/src/snowflake/snowpark/_internal/analyzer/analyzer_utils.py @@ -878,6 +878,9 @@ def create_table_as_select_statement( change_tracking: Optional[bool] = None, copy_grants: bool = False, iceberg_config: Optional[dict] = None, + *, + use_scoped_temp_objects: bool = False, + is_generated: bool = False, ) -> str: column_definition_sql = ( f"{LEFT_PARENTHESIS}{column_definition}{RIGHT_PARENTHESIS}" @@ -911,7 +914,8 @@ def create_table_as_select_statement( ) options_statement = get_options_statement(options) return ( - f"{CREATE}{OR + REPLACE if replace else EMPTY_STRING} {table_type.upper()} " + f"{CREATE}{OR + REPLACE if replace else EMPTY_STRING}" + f" {(get_temp_type_for_object(use_scoped_temp_objects, is_generated) if table_type.lower() in TEMPORARY_STRING_SET else table_type).upper()} " f"{ICEBERG if iceberg_config is not None else EMPTY_STRING}{TABLE}" f"{IF + NOT + EXISTS if not replace and not error else EMPTY_STRING} " f"{table_name}{column_definition_sql}{cluster_by_clause}{options_statement}" diff --git a/src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py b/src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py index 3600357f647..efb643289bb 100644 --- a/src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py +++ b/src/snowflake/snowpark/_internal/analyzer/snowflake_plan.py @@ -938,6 +938,8 @@ def get_create_table_as_select_plan(child: SnowflakePlan, replace, error): change_tracking=change_tracking, copy_grants=copy_grants, iceberg_config=iceberg_config, + use_scoped_temp_objects=use_scoped_temp_objects, + is_generated=is_generated, ), child, source_plan, diff --git a/src/snowflake/snowpark/_internal/compiler/large_query_breakdown.py b/src/snowflake/snowpark/_internal/compiler/large_query_breakdown.py index 34d27862ced..5707d71dc33 100644 --- a/src/snowflake/snowpark/_internal/compiler/large_query_breakdown.py +++ b/src/snowflake/snowpark/_internal/compiler/large_query_breakdown.py @@ -6,8 +6,6 @@ from collections import defaultdict from typing import List, Optional, Tuple -from sortedcontainers import SortedList - from snowflake.snowpark._internal.analyzer.analyzer_utils import ( drop_table_if_exists_statement, ) @@ -201,11 +199,11 @@ def _find_node_to_breakdown(self, root: TreeNode) -> Optional[TreeNode]: 1. Traverse the plan tree and find the valid nodes for partitioning. 2. If no valid node is found, return None. - 3. Keep valid nodes in a sorted list based on the complexity score. - 4. Return the node with the highest complexity score. + 3. Return the node with the highest complexity score. """ current_level = [root] - pipeline_breaker_list = SortedList(key=lambda x: x[0]) + candidate_node = None + candidate_score = -1 # start with -1 since score is always > 0 while current_level: next_level = [] @@ -215,23 +213,20 @@ def _find_node_to_breakdown(self, root: TreeNode) -> Optional[TreeNode]: self._parent_map[child].add(node) valid_to_breakdown, score = self._is_node_valid_to_breakdown(child) if valid_to_breakdown: - # Append score and child to the pipeline breaker sorted list - # so that the valid child with the highest complexity score - # is at the end of the list. - pipeline_breaker_list.add((score, child)) + # If the score for valid node is higher than the last candidate, + # update the candidate node and score. + if score > candidate_score: + candidate_score = score + candidate_node = child else: # don't traverse subtrees if parent is a valid candidate next_level.append(child) current_level = next_level - if not pipeline_breaker_list: - # Return None if no valid node is found for partitioning. - return None - - # Get the node with the highest complexity score - _, child = pipeline_breaker_list.pop() - return child + # If no valid node is found, candidate_node will be None. + # Otherwise, return the node with the highest complexity score. + return candidate_node def _get_partitioned_plan(self, root: TreeNode, child: TreeNode) -> SnowflakePlan: """This method takes cuts the child out from the root, creates a temp table plan for the diff --git a/src/snowflake/snowpark/modin/pandas/__init__.py b/src/snowflake/snowpark/modin/pandas/__init__.py index c4eb07d9589..b51a47b64b3 100644 --- a/src/snowflake/snowpark/modin/pandas/__init__.py +++ b/src/snowflake/snowpark/modin/pandas/__init__.py @@ -85,10 +85,16 @@ timedelta_range, ) +import modin.pandas + # TODO: SNOW-851745 make sure add all Snowpark pandas API general functions from modin.pandas import plotting # type: ignore[import] -from snowflake.snowpark.modin.pandas.dataframe import DataFrame +from snowflake.snowpark.modin.pandas.api.extensions import ( + register_dataframe_accessor, + register_series_accessor, +) +from snowflake.snowpark.modin.pandas.dataframe import _DATAFRAME_EXTENSIONS_, DataFrame from snowflake.snowpark.modin.pandas.general import ( concat, crosstab, @@ -140,15 +146,15 @@ read_xml, to_pickle, ) -from snowflake.snowpark.modin.pandas.series import Series +from snowflake.snowpark.modin.pandas.series import _SERIES_EXTENSIONS_, Series from snowflake.snowpark.modin.plugin._internal.session import SnowpandasSessionHolder +from snowflake.snowpark.modin.plugin._internal.telemetry import ( + try_add_telemetry_to_attribute, +) # The extensions assigned to this module _PD_EXTENSIONS_: dict = {} -# base needs to be re-exported in order to properly override docstrings for BasePandasDataset -# moving this import higher prevents sphinx from building documentation (??) -from snowflake.snowpark.modin.pandas import base # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.pd_extensions as pd_extensions # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.pd_overrides # isort: skip # noqa: E402,F401 @@ -157,12 +163,71 @@ DatetimeIndex, TimedeltaIndex, ) + +# this must occur before overrides are applied +_attrs_defined_on_modin_base = set(dir(modin.pandas.base.BasePandasDataset)) +_attrs_defined_on_series = set( + dir(Series) +) # TODO: SNOW-1063347 revisit when series.py is removed +_attrs_defined_on_dataframe = set( + dir(DataFrame) +) # TODO: SNOW-1063346 revisit when dataframe.py is removed + +# base overrides occur before subclass overrides in case subclasses override a base method +import snowflake.snowpark.modin.plugin.extensions.base_extensions # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.base_overrides # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.dataframe_extensions # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.dataframe_overrides # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.series_extensions # isort: skip # noqa: E402,F401 import snowflake.snowpark.modin.plugin.extensions.series_overrides # isort: skip # noqa: E402,F401 +# For any method defined on Series/DF, add telemetry to it if it meets all of the following conditions: +# 1. The method was defined directly on upstream BasePandasDataset (_attrs_defined_on_modin_base) +# 2. The method is not overridden by a child class (this will change) +# 3. The method is not overridden by an extensions module +# 4. The method name does not start with an _ +# +# TODO: SNOW-1063347 +# Since we still use the vendored version of Series and the overrides for the top-level +# namespace haven't been performed yet, we need to set properties on the vendored version +_base_telemetry_added_attrs = set() + +_series_ext = _SERIES_EXTENSIONS_.copy() +for attr_name in dir(Series): + if ( + attr_name in _attrs_defined_on_modin_base + and attr_name in _attrs_defined_on_series + and attr_name not in _series_ext + and not attr_name.startswith("_") + ): + register_series_accessor(attr_name)( + try_add_telemetry_to_attribute(attr_name, getattr(Series, attr_name)) + ) + _base_telemetry_added_attrs.add(attr_name) + +# TODO: SNOW-1063346 +# Since we still use the vendored version of DataFrame and the overrides for the top-level +# namespace haven't been performed yet, we need to set properties on the vendored version +_dataframe_ext = _DATAFRAME_EXTENSIONS_.copy() +for attr_name in dir(DataFrame): + if ( + attr_name in _attrs_defined_on_modin_base + and attr_name in _attrs_defined_on_dataframe + and attr_name not in _dataframe_ext + and not attr_name.startswith("_") + ): + # If telemetry was already added via Series, register the override but don't re-wrap + # the method in the telemetry annotation. If we don't do this check, we will end up + # double-reporting telemetry on some methods. + original_attr = getattr(DataFrame, attr_name) + new_attr = ( + original_attr + if attr_name in _base_telemetry_added_attrs + else try_add_telemetry_to_attribute(attr_name, original_attr) + ) + register_dataframe_accessor(attr_name)(new_attr) + _base_telemetry_added_attrs.add(attr_name) + def __getattr__(name: str) -> Any: """ @@ -220,7 +285,6 @@ def __getattr__(name: str) -> Any: "date_range", "Index", "MultiIndex", - "Series", "bdate_range", "period_range", "DatetimeIndex", @@ -318,8 +382,7 @@ def __getattr__(name: str) -> Any: # Manually re-export the members of the pd_extensions namespace, which are not declared in __all__. _EXTENSION_ATTRS = ["read_snowflake", "to_snowflake", "to_snowpark", "to_pandas"] # We also need to re-export native_pd.offsets, since modin.pandas doesn't re-export it. -# snowflake.snowpark.pandas.base also needs to be re-exported to make docstring overrides for BasePandasDataset work. -_ADDITIONAL_ATTRS = ["offsets", "base"] +_ADDITIONAL_ATTRS = ["offsets"] # This code should eventually be moved into the `snowflake.snowpark.modin.plugin` module instead. # Currently, trying to do so would result in incorrect results because `snowflake.snowpark.modin.pandas` diff --git a/src/snowflake/snowpark/modin/pandas/base.py b/src/snowflake/snowpark/modin/pandas/base.py deleted file mode 100644 index c08cdee1386..00000000000 --- a/src/snowflake/snowpark/modin/pandas/base.py +++ /dev/null @@ -1,4217 +0,0 @@ -# -# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. -# - -# Licensed to Modin Development Team under one or more contributor license agreements. -# See the NOTICE file distributed with this work for additional information regarding -# copyright ownership. The Modin Development Team licenses this file to you under the -# Apache License, Version 2.0 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under -# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF -# ANY KIND, either express or implied. See the License for the specific language -# governing permissions and limitations under the License. - -# Code in this file may constitute partial or total reimplementation, or modification of -# existing code originally distributed by the Modin project, under the Apache License, -# Version 2.0. - -"""Implement DataFrame/Series public API as pandas does.""" -from __future__ import annotations - -import pickle as pkl -import re -import warnings -from collections.abc import Hashable, Mapping, Sequence -from typing import Any, Callable, Literal, get_args - -import numpy as np -import numpy.typing as npt -import pandas -import pandas.core.generic -import pandas.core.resample -import pandas.core.window.rolling -from pandas._libs import lib -from pandas._libs.lib import NoDefault, is_bool, no_default -from pandas._typing import ( - AggFuncType, - AnyArrayLike, - Axes, - Axis, - CompressionOptions, - DtypeBackend, - FillnaOptions, - IgnoreRaise, - IndexKeyFunc, - IndexLabel, - Level, - NaPosition, - RandomState, - Scalar, - StorageOptions, - TimedeltaConvertibleTypes, - TimestampConvertibleTypes, -) -from pandas.compat import numpy as numpy_compat -from pandas.core.common import apply_if_callable, count_not_none, pipe -from pandas.core.dtypes.common import ( - is_dict_like, - is_dtype_equal, - is_list_like, - is_numeric_dtype, - is_object_dtype, - pandas_dtype, -) -from pandas.core.dtypes.inference import is_integer -from pandas.errors import SpecificationError -from pandas.util._validators import ( - validate_ascending, - validate_bool_kwarg, - validate_percentile, -) - -from snowflake.snowpark.modin import pandas as pd -from snowflake.snowpark.modin.pandas.utils import ( - ensure_index, - extract_validate_and_try_convert_named_aggs_from_kwargs, - get_as_shape_compatible_dataframe_or_series, - is_scalar, - raise_if_native_pandas_objects, - validate_and_try_convert_agg_func_arg_func_to_str, -) -from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta -from snowflake.snowpark.modin.plugin._typing import ListLike -from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage -from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage -from snowflake.snowpark.modin.utils import ( - _inherit_docstrings, - try_cast_to_pandas, - validate_int_kwarg, -) - -# Similar to pandas, sentinel value to use as kwarg in place of None when None has -# special meaning and needs to be distinguished from a user explicitly passing None. -sentinel = object() - -# Do not look up certain attributes in columns or index, as they're used for some -# special purposes, like serving remote context -_ATTRS_NO_LOOKUP = { - "____id_pack__", - "__name__", - "_cache", - "_ipython_canary_method_should_not_exist_", - "_ipython_display_", - "_repr_html_", - "_repr_javascript_", - "_repr_jpeg_", - "_repr_json_", - "_repr_latex_", - "_repr_markdown_", - "_repr_mimebundle_", - "_repr_pdf_", - "_repr_png_", - "_repr_svg_", - "__array_struct__", - "__array_interface__", - "_typ", -} - -_DEFAULT_BEHAVIOUR = { - "__init__", - "__class__", - "_get_index", - "_set_index", - "_pandas_class", - "_get_axis_number", - "empty", - "index", - "columns", - "name", - "dtypes", - "dtype", - "groupby", - "_get_name", - "_set_name", - "_default_to_pandas", - "_query_compiler", - "_to_pandas", - "_repartition", - "_build_repr_df", - "_reduce_dimension", - "__repr__", - "__len__", - "__constructor__", - "_create_or_update_from_compiler", - "_update_inplace", - # for persistance support; - # see DataFrame methods docstrings for more - "_inflate_light", - "_inflate_full", - "__reduce__", - "__reduce_ex__", - "_init", -} | _ATTRS_NO_LOOKUP - - -@_inherit_docstrings( - pandas.DataFrame, - apilink=["pandas.DataFrame", "pandas.Series"], - excluded=[ - pandas.DataFrame.between_time, - pandas.Series.between_time, - pandas.DataFrame.flags, - pandas.Series.flags, - pandas.DataFrame.kurt, - pandas.Series.kurt, - pandas.DataFrame.kurtosis, - pandas.Series.kurtosis, - pandas.DataFrame.rank, - pandas.Series.rank, - pandas.DataFrame.to_csv, - pandas.Series.to_csv, - pandas.DataFrame.sum, - ], -) -class BasePandasDataset(metaclass=TelemetryMeta): - """ - Implement most of the common code that exists in DataFrame/Series. - - Since both objects share the same underlying representation, and the algorithms - are the same, we use this object to define the general behavior of those objects - and then use those objects to define the output type. - - TelemetryMeta is a metaclass that automatically add telemetry decorators to classes/instance methods. - See TelemetryMeta for details. Note: Its subclasses will inherit this metaclass. - """ - - # pandas class that we pretend to be; usually it has the same name as our class - # but lives in "pandas" namespace. - _pandas_class = pandas.core.generic.NDFrame - - @pandas.util.cache_readonly - def _is_dataframe(self) -> bool: - """ - Tell whether this is a dataframe. - - Ideally, other methods of BasePandasDataset shouldn't care whether this - is a dataframe or a series, but sometimes we need to know. This method - is better than hasattr(self, "columns"), which for series will call - self.__getattr__("columns"), which requires materializing the index. - - Returns - ------- - bool : Whether this is a dataframe. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return issubclass(self._pandas_class, pandas.DataFrame) - - def _add_sibling(self, sibling): - """ - Add a DataFrame or Series object to the list of siblings. - - Siblings are objects that share the same query compiler. This function is called - when a shallow copy is made. - - Parameters - ---------- - sibling : BasePandasDataset - Dataset to add to siblings list. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - sibling._siblings = self._siblings + [self] - self._siblings += [sibling] - for sib in self._siblings: - sib._siblings += [sibling] - - def _update_inplace(self, new_query_compiler): - """ - Update the current DataFrame inplace. - - Parameters - ---------- - new_query_compiler : query_compiler - The new QueryCompiler to use to manage the data. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - old_query_compiler = self._query_compiler - self._query_compiler = new_query_compiler - for sib in self._siblings: - sib._query_compiler = new_query_compiler - old_query_compiler.free() - - def _validate_other( - self, - other, - axis, - dtype_check=False, - compare_index=False, - ): - """ - Help to check validity of other in inter-df operations. - - Parameters - ---------- - other : modin.pandas.BasePandasDataset - Another dataset to validate against `self`. - axis : {None, 0, 1} - Specifies axis along which to do validation. When `1` or `None` - is specified, validation is done along `index`, if `0` is specified - validation is done along `columns` of `other` frame. - dtype_check : bool, default: False - Validates that both frames have compatible dtypes. - compare_index : bool, default: False - Compare Index if True. - - Returns - ------- - modin.pandas.BasePandasDataset - Other frame if it is determined to be valid. - - Raises - ------ - ValueError - If `other` is `Series` and its length is different from - length of `self` `axis`. - TypeError - If any validation checks fail. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if isinstance(other, BasePandasDataset): - return other._query_compiler - if not is_list_like(other): - # We skip dtype checking if the other is a scalar. Note that pandas - # is_scalar can be misleading as it is False for almost all objects, - # even when those objects should be treated as scalars. See e.g. - # https://github.com/modin-project/modin/issues/5236. Therefore, we - # detect scalars by checking that `other` is neither a list-like nor - # another BasePandasDataset. - return other - axis = self._get_axis_number(axis) if axis is not None else 1 - result = other - if axis == 0: - if len(other) != len(self._query_compiler.index): - raise ValueError( - f"Unable to coerce to Series, length must be {len(self._query_compiler.index)}: " - + f"given {len(other)}" - ) - else: - if len(other) != len(self._query_compiler.columns): - raise ValueError( - f"Unable to coerce to Series, length must be {len(self._query_compiler.columns)}: " - + f"given {len(other)}" - ) - if hasattr(other, "dtype"): - other_dtypes = [other.dtype] * len(other) - elif is_dict_like(other): - other_dtypes = [ - type(other[label]) - for label in self._query_compiler.get_axis(axis) - # The binary operation is applied for intersection of axis labels - # and dictionary keys. So filtering out extra keys. - if label in other - ] - else: - other_dtypes = [type(x) for x in other] - if compare_index: - if not self.index.equals(other.index): - raise TypeError("Cannot perform operation with non-equal index") - # Do dtype checking. - if dtype_check: - self_dtypes = self._get_dtypes() - if is_dict_like(other): - # The binary operation is applied for the intersection of axis labels - # and dictionary keys. So filtering `self_dtypes` to match the `other` - # dictionary. - self_dtypes = [ - dtype - for label, dtype in zip( - self._query_compiler.get_axis(axis), self._get_dtypes() - ) - if label in other - ] - - # TODO(https://github.com/modin-project/modin/issues/5239): - # this spuriously rejects other that is a list including some - # custom type that can be added to self's elements. - if not all( - (is_numeric_dtype(self_dtype) and is_numeric_dtype(other_dtype)) - or (is_object_dtype(self_dtype) and is_object_dtype(other_dtype)) - # Check if dtype is timedelta ("m") or datetime ("M") - or ( - lib.is_np_dtype(self_dtype, "mM") - and lib.is_np_dtype(other_dtype, "mM") - ) - or is_dtype_equal(self_dtype, other_dtype) - for self_dtype, other_dtype in zip(self_dtypes, other_dtypes) - ): - raise TypeError("Cannot do operation with improper dtypes") - return result - - def _validate_function(self, func, on_invalid=None): - """ - Check the validity of the function which is intended to be applied to the frame. - - Parameters - ---------- - func : object - on_invalid : callable(str, cls), optional - Function to call in case invalid `func` is met, `on_invalid` takes an error - message and an exception type as arguments. If not specified raise an - appropriate exception. - **Note:** This parameter is a hack to concord with pandas error types. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - - def error_raiser(msg, exception=Exception): - raise exception(msg) - - if on_invalid is None: - on_invalid = error_raiser - - if isinstance(func, dict): - [self._validate_function(fn, on_invalid) for fn in func.values()] - return - # We also could validate this, but it may be quite expensive for lazy-frames - # if not all(idx in self.axes[axis] for idx in func.keys()): - # error_raiser("Invalid dict keys", KeyError) - - if not is_list_like(func): - func = [func] - - for fn in func: - if isinstance(fn, str): - if not (hasattr(self, fn) or hasattr(np, fn)): - on_invalid( - f"{fn} is not valid function for {type(self)} object.", - AttributeError, - ) - elif not callable(fn): - on_invalid( - f"One of the passed functions has an invalid type: {type(fn)}: {fn}, " - + "only callable or string is acceptable.", - TypeError, - ) - - def _binary_op( - self, - op: str, - other: BasePandasDataset, - axis: Axis, - level: Level | None = None, - fill_value: float | None = None, - **kwargs: Any, - ): - """ - Do binary operation between two datasets. - - Parameters - ---------- - op : str - Name of binary operation. - other : modin.pandas.BasePandasDataset - Second operand of binary operation. - axis: Whether to compare by the index (0 or ‘index’) or columns. (1 or ‘columns’). - level: Broadcast across a level, matching Index values on the passed MultiIndex level. - fill_value: Fill existing missing (NaN) values, and any new element needed for - successful DataFrame alignment, with this value before computation. - If data in both corresponding DataFrame locations is missing the result will be missing. - only arithmetic binary operation has this parameter (e.g., add() has, but eq() doesn't have). - - kwargs can contain the following parameters passed in at the frontend: - func: Only used for `combine` method. Function that takes two series as inputs and - return a Series or a scalar. Used to merge the two dataframes column by columns. - - Returns - ------- - modin.pandas.BasePandasDataset - Result of binary operation. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - raise_if_native_pandas_objects(other) - axis = self._get_axis_number(axis) - squeeze_self = isinstance(self, pd.Series) - - # pandas itself will ignore the axis argument when using Series.. - # Per default, it is set to axis=0. However, for the case of a Series interacting with - # a DataFrame the behavior is axis=1. Manually check here for this case and adjust the axis. - - is_lhs_series_and_rhs_dataframe = ( - True - if isinstance(self, pd.Series) and isinstance(other, pd.DataFrame) - else False - ) - - new_query_compiler = self._query_compiler.binary_op( - op=op, - other=other, - axis=1 if is_lhs_series_and_rhs_dataframe else axis, - level=level, - fill_value=fill_value, - squeeze_self=squeeze_self, - **kwargs, - ) - - from snowflake.snowpark.modin.pandas.dataframe import DataFrame - - # Modin Bug: https://github.com/modin-project/modin/issues/7236 - # For a Series interacting with a DataFrame, always return a DataFrame - return ( - DataFrame(query_compiler=new_query_compiler) - if is_lhs_series_and_rhs_dataframe - else self._create_or_update_from_compiler(new_query_compiler) - ) - - def _default_to_pandas(self, op, *args, **kwargs): - """ - Convert dataset to pandas type and call a pandas function on it. - - Parameters - ---------- - op : str - Name of pandas function. - *args : list - Additional positional arguments to be passed to `op`. - **kwargs : dict - Additional keywords arguments to be passed to `op`. - - Returns - ------- - object - Result of operation. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - args = try_cast_to_pandas(args) - kwargs = try_cast_to_pandas(kwargs) - pandas_obj = self._to_pandas() - if callable(op): - result = op(pandas_obj, *args, **kwargs) - elif isinstance(op, str): - # The inner `getattr` is ensuring that we are treating this object (whether - # it is a DataFrame, Series, etc.) as a pandas object. The outer `getattr` - # will get the operation (`op`) from the pandas version of the class and run - # it on the object after we have converted it to pandas. - attr = getattr(self._pandas_class, op) - if isinstance(attr, property): - result = getattr(pandas_obj, op) - else: - result = attr(pandas_obj, *args, **kwargs) - else: - ErrorMessage.internal_error( - failure_condition=True, - extra_log=f"{op} is an unsupported operation", - ) - # SparseDataFrames cannot be serialized by arrow and cause problems for Modin. - # For now we will use pandas. - if isinstance(result, type(self)) and not isinstance( - result, (pandas.SparseDataFrame, pandas.SparseSeries) - ): - return self._create_or_update_from_compiler( - result, inplace=kwargs.get("inplace", False) - ) - elif isinstance(result, pandas.DataFrame): - from snowflake.snowpark.modin.pandas import DataFrame - - return DataFrame(result) - elif isinstance(result, pandas.Series): - from snowflake.snowpark.modin.pandas import Series - - return Series(result) - # inplace - elif result is None: - return self._create_or_update_from_compiler( - getattr(pd, type(pandas_obj).__name__)(pandas_obj)._query_compiler, - inplace=True, - ) - else: - try: - if ( - isinstance(result, (list, tuple)) - and len(result) == 2 - and isinstance(result[0], pandas.DataFrame) - ): - # Some operations split the DataFrame into two (e.g. align). We need to wrap - # both of the returned results - if isinstance(result[1], pandas.DataFrame): - second = self.__constructor__(result[1]) - else: - second = result[1] - return self.__constructor__(result[0]), second - else: - return result - except TypeError: - return result - - @classmethod - def _get_axis_number(cls, axis): - """ - Convert axis name or number to axis index. - - Parameters - ---------- - axis : int, str or pandas._libs.lib.NoDefault - Axis name ('index' or 'columns') or number to be converted to axis index. - - Returns - ------- - int - 0 or 1 - axis index in the array of axes stored in the dataframe. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if axis is no_default: - axis = None - - return cls._pandas_class._get_axis_number(axis) if axis is not None else 0 - - @pandas.util.cache_readonly - def __constructor__(self): - """ - Construct DataFrame or Series object depending on self type. - - Returns - ------- - modin.pandas.BasePandasDataset - Constructed object. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return type(self) - - def abs(self): # noqa: RT01, D200 - """ - Return a `BasePandasDataset` with absolute numeric value of each element. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.__constructor__(query_compiler=self._query_compiler.unary_op("abs")) - - def _to_series_list(self, index: pd.Index) -> list[pd.Series]: - """ - Convert index to a list of series - Args: - index: can be single or multi index - - Returns: - the list of series - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if isinstance(index, pd.MultiIndex): - return [ - pd.Series(index.get_level_values(level)) - for level in range(index.nlevels) - ] - elif isinstance(index, pd.Index): - return [pd.Series(index)] - - def _set_index(self, new_index: Axes) -> None: - """ - Set the index for this DataFrame. - - Parameters - ---------- - new_index : pandas.Index - The new index to set this. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - self._update_inplace( - new_query_compiler=self._query_compiler.set_index( - [ - s._query_compiler - for s in self._to_series_list(ensure_index(new_index)) - ] - ) - ) - - def set_axis( - self, - labels: IndexLabel, - *, - axis: Axis = 0, - copy: bool | NoDefault = no_default, - ): - """ - Assign desired index to given axis. - """ - # Behavior based on copy: - # ----------------------------------- - # - In native pandas, copy determines whether to create a copy of the data (not DataFrame). - # - We cannot emulate the native pandas' copy behavior in Snowpark since a copy of only data - # cannot be created -- you can only copy the whole object (DataFrame/Series). - # - # Snowpark behavior: - # ------------------ - # - copy is kept for compatibility with native pandas but is ignored. The user is warned that copy is unused. - # Warn user that copy does not do anything. - if copy is not no_default: - WarningMessage.single_warning( - message=f"{type(self).__name__}.set_axis 'copy' keyword is unused and is ignored." - ) - if labels is None: - raise TypeError("None is not a valid value for the parameter 'labels'.") - - # Determine whether to update self or a copy and perform update. - obj = self.copy() - setattr(obj, axis, labels) - return obj - - def _get_index(self): - """ - Get the index for this DataFrame. - - Returns - ------- - pandas.Index - The union of all indexes across the partitions. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - from snowflake.snowpark.modin.plugin.extensions.index import Index - - if self._query_compiler.is_multiindex(): - # Lazy multiindex is not supported - return self._query_compiler.index - - idx = Index(query_compiler=self._query_compiler) - idx._set_parent(self) - return idx - - index = property(_get_index, _set_index) - - def add( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `add`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "add", other, axis=axis, level=level, fill_value=fill_value - ) - - def aggregate( - self, func: AggFuncType = None, axis: Axis | None = 0, *args: Any, **kwargs: Any - ): - """ - Aggregate using one or more operations over the specified axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - from snowflake.snowpark.modin.pandas import Series - - origin_axis = axis - axis = self._get_axis_number(axis) - - if axis == 1 and isinstance(self, Series): - raise ValueError(f"No axis named {origin_axis} for object type Series") - - if len(self._query_compiler.columns) == 0: - # native pandas raise error with message "no result", here we raise a more readable error. - raise ValueError("No column to aggregate on.") - - # If we are using named kwargs, then we do not clear the kwargs (need them in the QC for processing - # order, as well as formatting error messages.) - uses_named_kwargs = False - # If aggregate is called on a Series, named aggregations can be passed in via a dictionary - # to func. - if func is None or (is_dict_like(func) and not self._is_dataframe): - if axis == 1: - raise ValueError( - "`func` must not be `None` when `axis=1`. Named aggregations are not supported with `axis=1`." - ) - if func is not None: - # If named aggregations are passed in via a dictionary to func, then we - # ignore the kwargs. - if any(is_dict_like(value) for value in func.values()): - # We can only get to this codepath if self is a Series, and func is a dictionary. - # In this case, if any of the values of func are themselves dictionaries, we must raise - # a Specification Error, as that is what pandas does. - raise SpecificationError("nested renamer is not supported") - kwargs = func - func = extract_validate_and_try_convert_named_aggs_from_kwargs( - self, allow_duplication=False, axis=axis, **kwargs - ) - uses_named_kwargs = True - else: - func = validate_and_try_convert_agg_func_arg_func_to_str( - agg_func=func, - obj=self, - allow_duplication=False, - axis=axis, - ) - - # This is to stay consistent with pandas result format, when the func is single - # aggregation function in format of callable or str, reduce the result dimension to - # convert dataframe to series, or convert series to scalar. - # Note: When named aggregations are used, the result is not reduced, even if there - # is only a single function. - # needs_reduce_dimension cannot be True if we are using named aggregations, since - # the values for func in that case are either NamedTuples (AggFuncWithLabels) or - # lists of NamedTuples, both of which are list like. - need_reduce_dimension = ( - (callable(func) or isinstance(func, str)) - # A Series should be returned when a single scalar string/function aggregation function, or a - # dict of scalar string/functions is specified. In all other cases (including if the function - # is a 1-element list), the result is a DataFrame. - # - # The examples below have axis=1, but the same logic is applied for axis=0. - # >>> df = pd.DataFrame({"a": [0, 1], "b": [2, 3]}) - # - # single aggregation: return Series - # >>> df.agg("max", axis=1) - # 0 2 - # 1 3 - # dtype: int64 - # - # list of aggregations: return DF - # >>> df.agg(["max"], axis=1) - # max - # 0 2 - # 1 3 - # - # dict where all aggregations are strings: return Series - # >>> df.agg({1: "max", 0: "min"}, axis=1) - # 1 3 - # 0 0 - # dtype: int64 - # - # dict where one element is a list: return DF - # >>> df.agg({1: "max", 0: ["min"]}, axis=1) - # max min - # 1 3.0 NaN - # 0 NaN 0.0 - or ( - is_dict_like(func) - and all(not is_list_like(value) for value in func.values()) - ) - ) - - # If func is a dict, pandas will not respect kwargs for each aggregation function, and - # we should drop them before passing the to the query compiler. - # - # >>> native_pd.DataFrame({"a": [0, 1], "b": [np.nan, 0]}).agg("max", skipna=False, axis=1) - # 0 NaN - # 1 1.0 - # dtype: float64 - # >>> native_pd.DataFrame({"a": [0, 1], "b": [np.nan, 0]}).agg(["max"], skipna=False, axis=1) - # max - # 0 0.0 - # 1 1.0 - # >>> pd.DataFrame([[np.nan], [0]]).aggregate("count", skipna=True, axis=0) - # 0 1 - # dtype: int8 - # >>> pd.DataFrame([[np.nan], [0]]).count(skipna=True, axis=0) - # TypeError: got an unexpected keyword argument 'skipna' - if is_dict_like(func) and not uses_named_kwargs: - kwargs.clear() - - result = self.__constructor__( - query_compiler=self._query_compiler.agg( - func=func, - axis=axis, - args=args, - kwargs=kwargs, - ) - ) - - if need_reduce_dimension: - if self._is_dataframe: - result = Series(query_compiler=result._query_compiler) - - if isinstance(result, Series): - # When func is just "quantile" with a scalar q, result has quantile value as name - q = kwargs.get("q", 0.5) - if func == "quantile" and is_scalar(q): - result.name = q - else: - result.name = None - - # handle case for single scalar (same as result._reduce_dimension()) - if isinstance(self, Series): - return result.to_pandas().squeeze() - - return result - - agg = aggregate - - def _string_function(self, func, *args, **kwargs): - """ - Execute a function identified by its string name. - - Parameters - ---------- - func : str - Function name to call on `self`. - *args : list - Positional arguments to pass to func. - **kwargs : dict - Keyword arguments to pass to func. - - Returns - ------- - object - Function result. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - assert isinstance(func, str) - f = getattr(self, func, None) - if f is not None: - if callable(f): - return f(*args, **kwargs) - assert len(args) == 0 - assert len([kwarg for kwarg in kwargs if kwarg != "axis"]) == 0 - return f - f = getattr(np, func, None) - if f is not None: - return self._default_to_pandas("agg", func, *args, **kwargs) - raise ValueError(f"{func} is an unknown string function") - - def _get_dtypes(self): - """ - Get dtypes as list. - - Returns - ------- - list - Either a one-element list that contains `dtype` if object denotes a Series - or a list that contains `dtypes` if object denotes a DataFrame. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if hasattr(self, "dtype"): - return [self.dtype] - else: - return list(self.dtypes) - - def align( - self, - other, - join="outer", - axis=None, - level=None, - copy=None, - fill_value=None, - method=lib.no_default, - limit=lib.no_default, - fill_axis=lib.no_default, - broadcast_axis=lib.no_default, - ): # pragma: no cover # noqa: PR01, RT01, D200 - """ - Align two objects on their axes with the specified join method. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if ( - method is not lib.no_default - or limit is not lib.no_default - or fill_axis is not lib.no_default - ): - warnings.warn( # noqa: B028 - "The 'method', 'limit', and 'fill_axis' keywords in " - + f"{type(self).__name__}.align are deprecated and will be removed " - + "in a future version. Call fillna directly on the returned objects " - + "instead.", - FutureWarning, - ) - if fill_axis is lib.no_default: - fill_axis = 0 - if method is lib.no_default: - method = None - if limit is lib.no_default: - limit = None - - if broadcast_axis is not lib.no_default: - msg = ( - f"The 'broadcast_axis' keyword in {type(self).__name__}.align is " - + "deprecated and will be removed in a future version." - ) - if broadcast_axis is not None: - if self.ndim == 1 and other.ndim == 2: - msg += ( - " Use left = DataFrame({col: left for col in right.columns}, " - + "index=right.index) before calling `left.align(right)` instead." - ) - elif self.ndim == 2 and other.ndim == 1: - msg += ( - " Use right = DataFrame({col: right for col in left.columns}, " - + "index=left.index) before calling `left.align(right)` instead" - ) - warnings.warn(msg, FutureWarning) # noqa: B028 - else: - broadcast_axis = None - - left, right = self._query_compiler.align( - other._query_compiler, - join=join, - axis=axis, - level=level, - copy=copy, - fill_value=fill_value, - method=method, - limit=limit, - fill_axis=fill_axis, - broadcast_axis=broadcast_axis, - ) - return self.__constructor__(query_compiler=left), self.__constructor__( - query_compiler=right - ) - - def all(self, axis=0, bool_only=None, skipna=True, **kwargs): - """ - Return whether all elements are True, potentially over an axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - validate_bool_kwarg(skipna, "skipna", none_allowed=False) - if axis is not None: - axis = self._get_axis_number(axis) - if bool_only and axis == 0: - if hasattr(self, "dtype"): - ErrorMessage.not_implemented( - "{}.{} does not implement numeric_only.".format( - type(self).__name__, "all" - ) - ) # pragma: no cover - data_for_compute = self[self.columns[self.dtypes == np.bool_]] - return data_for_compute.all( - axis=axis, bool_only=False, skipna=skipna, **kwargs - ) - return self._reduce_dimension( - self._query_compiler.all( - axis=axis, bool_only=bool_only, skipna=skipna, **kwargs - ) - ) - else: - if bool_only: - raise ValueError(f"Axis must be 0 or 1 (got {axis})") - # Reduce to a scalar if axis is None. - result = self._reduce_dimension( - # FIXME: Judging by pandas docs `**kwargs` serves only compatibility - # purpose and does not affect the result, we shouldn't pass them to the query compiler. - self._query_compiler.all( - axis=0, - bool_only=bool_only, - skipna=skipna, - **kwargs, - ) - ) - if isinstance(result, BasePandasDataset): - return result.all( - axis=axis, bool_only=bool_only, skipna=skipna, **kwargs - ) - return result - - def any(self, axis=0, bool_only=None, skipna=True, **kwargs): - """ - Return whether any element is True, potentially over an axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - validate_bool_kwarg(skipna, "skipna", none_allowed=False) - if axis is not None: - axis = self._get_axis_number(axis) - if bool_only and axis == 0: - if hasattr(self, "dtype"): - ErrorMessage.not_implemented( - "{}.{} does not implement numeric_only.".format( - type(self).__name__, "all" - ) - ) # pragma: no cover - data_for_compute = self[self.columns[self.dtypes == np.bool_]] - return data_for_compute.any( - axis=axis, bool_only=False, skipna=skipna, **kwargs - ) - return self._reduce_dimension( - self._query_compiler.any( - axis=axis, bool_only=bool_only, skipna=skipna, **kwargs - ) - ) - else: - if bool_only: - raise ValueError(f"Axis must be 0 or 1 (got {axis})") - # Reduce to a scalar if axis is None. - result = self._reduce_dimension( - self._query_compiler.any( - axis=0, - bool_only=bool_only, - skipna=skipna, - **kwargs, - ) - ) - if isinstance(result, BasePandasDataset): - return result.any( - axis=axis, bool_only=bool_only, skipna=skipna, **kwargs - ) - return result - - def apply( - self, - func, - axis, - broadcast, - raw, - reduce, - result_type, - convert_dtype, - args, - **kwds, - ): # noqa: PR01, RT01, D200 - """ - Apply a function along an axis of the `BasePandasDataset`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - - def error_raiser(msg, exception): - """Convert passed exception to the same type as pandas do and raise it.""" - # HACK: to concord with pandas error types by replacing all of the - # TypeErrors to the AssertionErrors - exception = exception if exception is not TypeError else AssertionError - raise exception(msg) - - self._validate_function(func, on_invalid=error_raiser) - axis = self._get_axis_number(axis) - # TODO SNOW-864025: Support str in series.apply and df.apply - if isinstance(func, str): - # if axis != 1 function can be bounded to the Series, which doesn't - # support axis parameter - if axis == 1: - kwds["axis"] = axis - result = self._string_function(func, *args, **kwds) - if isinstance(result, BasePandasDataset): - return result._query_compiler - return result - # TODO SNOW-856682: Support dict in series.apply and df.apply - elif isinstance(func, dict): - if len(self.columns) != len(set(self.columns)): - WarningMessage.mismatch_with_pandas( - operation="apply", - message="Duplicate column names not supported with apply().", - ) # pragma: no cover - query_compiler = self._query_compiler.apply( - func, - axis, - args=args, - raw=raw, - result_type=result_type, - **kwds, - ) - return query_compiler - - def asfreq( - self, - freq: str, - method: FillnaOptions | None = None, - how: str | None = None, - normalize: bool = False, - fill_value: Scalar = None, - ): # noqa: PR01, RT01, D200 - """ - Convert time series to specified frequency. - """ - return self.__constructor__( - query_compiler=self._query_compiler.asfreq( - freq=freq, - method=method, - how=how, - normalize=normalize, - fill_value=fill_value, - ) - ) - - def asof(self, where, subset=None): # noqa: PR01, RT01, D200 - """ - Return the last row(s) without any NaNs before `where`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - scalar = not is_list_like(where) - if isinstance(where, pandas.Index): - # Prevent accidental mutation of original: - where = where.copy() - else: - if scalar: - where = [where] - where = pandas.Index(where) - - if subset is None: - data = self - else: - # Only relevant for DataFrames: - data = self[subset] - no_na_index = data.dropna().index - new_index = pandas.Index([no_na_index.asof(i) for i in where]) - result = self.reindex(new_index) - result.index = where - - if scalar: - # Need to return a Series: - result = result.squeeze() - return result - - def astype( - self, - dtype: str | type | pd.Series | dict[str, type], - copy: bool = True, - errors: Literal["raise", "ignore"] = "raise", - ) -> pd.DataFrame | pd.Series: - """ - Cast a Modin object to a specified dtype `dtype`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - # dtype can be a series, a dict, or a scalar. If it's series or scalar, - # convert it to a dict before passing it to the query compiler. - raise_if_native_pandas_objects(dtype) - from snowflake.snowpark.modin.pandas import Series - - if isinstance(dtype, Series): - dtype = dtype.to_pandas() - if not dtype.index.is_unique: - raise ValueError( - "The new Series of types must have a unique index, i.e. " - + "it must be one-to-one mapping from column names to " - + " their new dtypes." - ) - dtype = dtype.to_dict() - # If we got a series or dict originally, dtype is a dict now. Its keys - # must be column names. - if isinstance(dtype, dict): - # Avoid materializing columns. The query compiler will handle errors where - # dtype dict includes keys that are not in columns. - col_dtypes = dtype - for col_name in col_dtypes: - if col_name not in self._query_compiler.columns: - raise KeyError( - "Only a column name can be used for the key in a dtype mappings argument. " - f"'{col_name}' not found in columns." - ) - else: - # Assume that the dtype is a scalar. - col_dtypes = {column: dtype for column in self._query_compiler.columns} - - # ensure values are pandas dtypes - col_dtypes = {k: pandas_dtype(v) for k, v in col_dtypes.items()} - new_query_compiler = self._query_compiler.astype(col_dtypes, errors=errors) - return self._create_or_update_from_compiler(new_query_compiler, not copy) - - @property - def at(self, axis=None): # noqa: PR01, RT01, D200 - """ - Get a single value for a row/column label pair. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - from .indexing import _AtIndexer - - return _AtIndexer(self) - - def at_time(self, time, asof=False, axis=None): # noqa: PR01, RT01, D200 - """ - Select values at particular time of day (e.g., 9:30AM). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if asof: # pragma: no cover - # pandas raises NotImplementedError for asof=True, so we do, too. - raise NotImplementedError("'asof' argument is not supported") - return self.between_time( - start_time=time, end_time=time, inclusive="both", axis=axis - ) - - def backfill( - self, - axis: Axis | None = None, - inplace: bool = False, - limit: int | None = None, - downcast: dict | None = None, - ): - """ - Synonym for `DataFrame.fillna` with ``method='bfill'``. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - warnings.warn( - "Series/DataFrame.backfill is deprecated. Use Series/DataFrame.bfill instead.", - FutureWarning, - stacklevel=1, - ) - return self.fillna( - method="bfill", axis=axis, limit=limit, downcast=downcast, inplace=inplace - ) - - @_inherit_docstrings( - pandas.DataFrame.between_time, apilink="pandas.DataFrame.between_time" - ) - def between_time( - self: BasePandasDataset, - start_time, - end_time, - inclusive: str | None = None, - axis=None, - ): # noqa: PR01, RT01, D200 - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._create_or_update_from_compiler( - self._query_compiler.between_time( - start_time=pandas.core.tools.times.to_time(start_time), - end_time=pandas.core.tools.times.to_time(end_time), - inclusive=inclusive, - axis=self._get_axis_number(axis), - ) - ) - - def bfill( - self, - axis: Axis | None = None, - inplace: bool = False, - limit: int | None = None, - downcast: dict | None = None, - ): - """ - Synonym for `DataFrame.fillna` with ``method='bfill'``. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.fillna( - method="bfill", axis=axis, limit=limit, downcast=downcast, inplace=inplace - ) - - def bool(self): # noqa: RT01, D200 - """ - Return the bool of a single element `BasePandasDataset`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - shape = self.shape - if shape != (1,) and shape != (1, 1): - raise ValueError( - """The PandasObject does not have exactly - 1 element. Return the bool of a single - element PandasObject. The truth value is - ambiguous. Use a.empty, a.item(), a.any() - or a.all().""" - ) - else: - return self._to_pandas().bool() - - def clip( - self, lower=None, upper=None, axis=None, inplace=False, *args, **kwargs - ): # noqa: PR01, RT01, D200 - """ - Trim values at input threshold(s). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - # validate inputs - if axis is not None: - axis = self._get_axis_number(axis) - self._validate_dtypes(numeric_only=True) - inplace = validate_bool_kwarg(inplace, "inplace") - axis = numpy_compat.function.validate_clip_with_axis(axis, args, kwargs) - # any np.nan bounds are treated as None - if lower is not None and np.any(np.isnan(lower)): - lower = None - if upper is not None and np.any(np.isnan(upper)): - upper = None - if is_list_like(lower) or is_list_like(upper): - if axis is None: - raise ValueError("Must specify axis = 0 or 1") - lower = self._validate_other(lower, axis) - upper = self._validate_other(upper, axis) - # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility - # purpose and does not affect the result, we shouldn't pass them to the query compiler. - new_query_compiler = self._query_compiler.clip( - lower=lower, upper=upper, axis=axis, inplace=inplace, *args, **kwargs - ) - return self._create_or_update_from_compiler(new_query_compiler, inplace) - - def combine(self, other, func, fill_value=None, **kwargs): # noqa: PR01, RT01, D200 - """ - Perform combination of `BasePandasDataset`-s according to `func`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "combine", other, axis=0, func=func, fill_value=fill_value, **kwargs - ) - - def combine_first(self, other): # noqa: PR01, RT01, D200 - """ - Update null elements with value in the same location in `other`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("combine_first", other, axis=0) - - def copy(self, deep: bool = True): - """ - Make a copy of the object's metadata. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if deep: - return self.__constructor__(query_compiler=self._query_compiler.copy()) - new_obj = self.__constructor__(query_compiler=self._query_compiler) - self._add_sibling(new_obj) - return new_obj - - def count( - self, - axis: Axis | None = 0, - numeric_only: bool = False, - ): - """ - Count non-NA cells for `BasePandasDataset`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._agg_helper( - func="count", - axis=axis, - numeric_only=numeric_only, - ) - - def cummax(self, axis=None, skipna=True, *args, **kwargs): # noqa: PR01, RT01, D200 - """ - Return cumulative maximum over a `BasePandasDataset` axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - if axis == 1: - self._validate_dtypes(numeric_only=True) - return self.__constructor__( - # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility - # purpose and does not affect the result, we shouldn't pass them to the query compiler. - query_compiler=self._query_compiler.cummax( - fold_axis=axis, axis=axis, skipna=skipna, **kwargs - ) - ) - - def cummin(self, axis=None, skipna=True, *args, **kwargs): # noqa: PR01, RT01, D200 - """ - Return cumulative minimum over a `BasePandasDataset` axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - if axis == 1: - self._validate_dtypes(numeric_only=True) - return self.__constructor__( - # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility - # purpose and does not affect the result, we shouldn't pass them to the query compiler. - query_compiler=self._query_compiler.cummin( - fold_axis=axis, axis=axis, skipna=skipna, **kwargs - ) - ) - - def cumprod( - self, axis=None, skipna=True, *args, **kwargs - ): # noqa: PR01, RT01, D200 - """ - Return cumulative product over a `BasePandasDataset` axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - self._validate_dtypes(numeric_only=True) - return self.__constructor__( - # FIXME: Judging by pandas docs `**kwargs` serves only compatibility - # purpose and does not affect the result, we shouldn't pass them to the query compiler. - query_compiler=self._query_compiler.cumprod( - fold_axis=axis, axis=axis, skipna=skipna, **kwargs - ) - ) - - def cumsum(self, axis=None, skipna=True, *args, **kwargs): # noqa: PR01, RT01, D200 - """ - Return cumulative sum over a `BasePandasDataset` axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - self._validate_dtypes(numeric_only=True) - return self.__constructor__( - # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility - # purpose and does not affect the result, we shouldn't pass them to the query compiler. - query_compiler=self._query_compiler.cumsum( - fold_axis=axis, axis=axis, skipna=skipna, **kwargs - ) - ) - - def describe( - self, - percentiles: ListLike | None = None, - include: ListLike | Literal["all"] | None = None, - exclude: ListLike | None = None, - ) -> BasePandasDataset: - """ - Generate descriptive statistics. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - # Upstream modin uses pandas.core.methods.describe._refine_percentiles for this, - # which is not available in pandas 1.5.X - if percentiles is not None: - # explicit conversion of `percentiles` to list - percentiles = list(percentiles) - - # get them all to be in [0, 1] - validate_percentile(percentiles) - - # median should always be included - if 0.5 not in percentiles: - percentiles.append(0.5) - percentiles = np.asarray(percentiles) - else: - percentiles = np.array([0.25, 0.5, 0.75]) - - data = self - if self._is_dataframe: - # Upstream modin lacks this check because it defaults to pandas for describing empty dataframes - if len(self.columns) == 0: - raise ValueError("Cannot describe a DataFrame without columns") - - # include/exclude are ignored for Series - if (include is None) and (exclude is None): - # when some numerics are found, keep only numerics - default_include: list[npt.DTypeLike] = [np.number] - default_include.append("datetime") - data = self.select_dtypes(include=default_include) - if len(data.columns) == 0: - data = self - elif include == "all": - if exclude is not None: - raise ValueError("exclude must be None when include is 'all'") - data = self - else: - data = self.select_dtypes( - include=include, - exclude=exclude, - ) - # Upstream modin uses data.empty, but that incurs an extra row count query - if self._is_dataframe and len(data.columns) == 0: - # Match pandas error from concatenating empty list of series descriptions. - raise ValueError("No objects to concatenate") - - return self.__constructor__( - query_compiler=data._query_compiler.describe(percentiles=percentiles) - ) - - def diff(self, periods: int = 1, axis: Axis = 0): - """ - First discrete difference of element. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - # We must only accept integer (or float values that are whole numbers) - # for periods. - int_periods = validate_int_kwarg(periods, "periods", float_allowed=True) - axis = self._get_axis_number(axis) - return self.__constructor__( - query_compiler=self._query_compiler.diff(axis=axis, periods=int_periods) - ) - - def drop( - self, - labels: IndexLabel = None, - axis: Axis = 0, - index: IndexLabel = None, - columns: IndexLabel = None, - level: Level = None, - inplace: bool = False, - errors: IgnoreRaise = "raise", - ) -> BasePandasDataset | None: - """ - Drop specified labels from `BasePandasDataset`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - inplace = validate_bool_kwarg(inplace, "inplace") - if labels is not None: - if index is not None or columns is not None: - raise ValueError("Cannot specify both 'labels' and 'index'/'columns'") - axes = {self._get_axis_number(axis): labels} - elif index is not None or columns is not None: - axes = {0: index, 1: columns} - else: - raise ValueError( - "Need to specify at least one of 'labels', 'index' or 'columns'" - ) - - for axis, labels in axes.items(): - if labels is not None: - if level is not None and not self._query_compiler.has_multiindex( - axis=axis - ): - # Same error as native pandas. - raise AssertionError("axis must be a MultiIndex") - # According to pandas documentation, a tuple will be used as a single - # label and not treated as a list-like. - if not is_list_like(labels) or isinstance(labels, tuple): - axes[axis] = [labels] - - new_query_compiler = self._query_compiler.drop( - index=axes.get(0), columns=axes.get(1), level=level, errors=errors - ) - return self._create_or_update_from_compiler(new_query_compiler, inplace) - - def _dropna( - self, - axis: Axis = 0, - how: str | NoDefault = no_default, - thresh: int | NoDefault = no_default, - subset: IndexLabel = None, - inplace: bool = False, - ): - inplace = validate_bool_kwarg(inplace, "inplace") - - if is_list_like(axis): - raise TypeError("supplying multiple axes to axis is no longer supported.") - - axis = self._get_axis_number(axis) - - if (how is not no_default) and (thresh is not no_default): - raise TypeError( - "You cannot set both the how and thresh arguments at the same time." - ) - - if how is no_default: - how = "any" - if how not in ["any", "all"]: - raise ValueError("invalid how option: %s" % how) - if subset is not None: - if axis == 1: - indices = self.index.get_indexer_for(subset) - check = indices == -1 - if check.any(): - raise KeyError(list(np.compress(check, subset))) - else: - indices = self.columns.get_indexer_for(subset) - check = indices == -1 - if check.any(): - raise KeyError(list(np.compress(check, subset))) - - new_query_compiler = self._query_compiler.dropna( - axis=axis, - how=how, - thresh=thresh, - subset=subset, - ) - return self._create_or_update_from_compiler(new_query_compiler, inplace) - - def droplevel(self, level, axis=0): # noqa: PR01, RT01, D200 - """ - Return `BasePandasDataset` with requested index / column level(s) removed. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - new_axis = self.axes[axis].droplevel(level) - result = self.copy() - if axis == 0: - result.index = new_axis - else: - result.columns = new_axis - return result - - def drop_duplicates( - self, keep="first", inplace=False, **kwargs - ): # noqa: PR01, RT01, D200 - """ - Return `BasePandasDataset` with duplicate rows removed. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - inplace = validate_bool_kwarg(inplace, "inplace") - ignore_index = kwargs.get("ignore_index", False) - subset = kwargs.get("subset", None) - if subset is not None: - if is_list_like(subset): - if not isinstance(subset, list): - subset = list(subset) - else: - subset = [subset] - df = self[subset] - else: - df = self - duplicated = df.duplicated(keep=keep) - result = self[~duplicated] - if ignore_index: - result.index = pandas.RangeIndex(stop=len(result)) - if inplace: - self._update_inplace(result._query_compiler) - else: - return result - - def mask( - self, - cond: BasePandasDataset | Callable | AnyArrayLike, - other: BasePandasDataset | Callable | Scalar | None = np.nan, - inplace: bool = False, - axis: Axis | None = None, - level: Level | None = None, - ): - """ - Replace values where the condition is True. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - # TODO: https://snowflakecomputing.atlassian.net/browse/SNOW-985670 - # will move pre-processing to QC layer. - inplace = validate_bool_kwarg(inplace, "inplace") - if cond is None: - raise ValueError("Array conditional must be same shape as self") - - cond = apply_if_callable(cond, self) - - if isinstance(cond, Callable): - raise NotImplementedError("Do not support callable for 'cond' parameter.") - - from snowflake.snowpark.modin.pandas import Series - - if isinstance(cond, Series): - cond._query_compiler._shape_hint = "column" - if isinstance(self, Series): - self._query_compiler._shape_hint = "column" - if isinstance(other, Series): - other._query_compiler._shape_hint = "column" - - if not isinstance(cond, BasePandasDataset): - cond = get_as_shape_compatible_dataframe_or_series(cond, self) - cond._query_compiler._shape_hint = "array" - - if other is not None: - other = apply_if_callable(other, self) - - if isinstance(other, np.ndarray): - other = get_as_shape_compatible_dataframe_or_series( - other, - self, - shape_mismatch_message="other must be the same shape as self when an ndarray", - ) - other._query_compiler._shape_hint = "array" - - if isinstance(other, BasePandasDataset): - other = other._query_compiler - - query_compiler = self._query_compiler.mask( - cond._query_compiler, - other, - axis, - level, - ) - - return self._create_or_update_from_compiler(query_compiler, inplace) - - def where( - self, - cond: BasePandasDataset | Callable | AnyArrayLike, - other: BasePandasDataset | Callable | Scalar | None = np.nan, - inplace: bool = False, - axis: Axis | None = None, - level: Level | None = None, - ): - """ - Replace values where the condition is False. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - # TODO: SNOW-985670: Refactor `where` and `mask` - # will move pre-processing to QC layer. - inplace = validate_bool_kwarg(inplace, "inplace") - if cond is None: - raise ValueError("Array conditional must be same shape as self") - - cond = apply_if_callable(cond, self) - - if isinstance(cond, Callable): - raise NotImplementedError("Do not support callable for 'cond' parameter.") - - from snowflake.snowpark.modin.pandas import Series - - if isinstance(cond, Series): - cond._query_compiler._shape_hint = "column" - if isinstance(self, Series): - self._query_compiler._shape_hint = "column" - if isinstance(other, Series): - other._query_compiler._shape_hint = "column" - - if not isinstance(cond, BasePandasDataset): - cond = get_as_shape_compatible_dataframe_or_series(cond, self) - cond._query_compiler._shape_hint = "array" - - if other is not None: - other = apply_if_callable(other, self) - - if isinstance(other, np.ndarray): - other = get_as_shape_compatible_dataframe_or_series( - other, - self, - shape_mismatch_message="other must be the same shape as self when an ndarray", - ) - other._query_compiler._shape_hint = "array" - - if isinstance(other, BasePandasDataset): - other = other._query_compiler - - query_compiler = self._query_compiler.where( - cond._query_compiler, - other, - axis, - level, - ) - - return self._create_or_update_from_compiler(query_compiler, inplace) - - def eq(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 - """ - Get equality of `BasePandasDataset` and `other`, element-wise (binary operator `eq`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("eq", other, axis=axis, level=level, dtypes=np.bool_) - - def explode(self, column, ignore_index: bool = False): # noqa: PR01, RT01, D200 - """ - Transform each element of a list-like to a row. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - exploded = self.__constructor__( - query_compiler=self._query_compiler.explode(column) - ) - if ignore_index: - exploded = exploded.reset_index(drop=True) - return exploded - - def ewm( - self, - com: float | None = None, - span: float | None = None, - halflife: float | TimedeltaConvertibleTypes | None = None, - alpha: float | None = None, - min_periods: int | None = 0, - adjust: bool = True, - ignore_na: bool = False, - axis: Axis = 0, - times: str | np.ndarray | BasePandasDataset | None = None, - method: str = "single", - ) -> pandas.core.window.ewm.ExponentialMovingWindow: # noqa: PR01, RT01, D200 - """ - Provide exponentially weighted (EW) calculations. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - "ewm", - com=com, - span=span, - halflife=halflife, - alpha=alpha, - min_periods=min_periods, - adjust=adjust, - ignore_na=ignore_na, - axis=axis, - times=times, - method=method, - ) - - def expanding( - self, min_periods=1, axis=0, method="single" - ): # noqa: PR01, RT01, D200 - """ - Provide expanding window calculations. - """ - from .window import Expanding - - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - name = "expanding" - if axis == 1: - warnings.warn( - f"Support for axis=1 in {type(self).__name__}.{name} is " - + "deprecated and will be removed in a future version. " - + f"Use obj.T.{name}(...) instead", - FutureWarning, - stacklevel=1, - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.{name} is " - + "deprecated and will be removed in a future version. " - + "Call the method without the axis keyword instead.", - FutureWarning, - stacklevel=1, - ) - else: - axis = 0 - - return Expanding( - self, - min_periods=min_periods, - axis=axis, - method=method, - ) - - def ffill( - self, - axis: Axis | None = None, - inplace: bool = False, - limit: int | None = None, - downcast: dict | None = None, - ): - """ - Synonym for `DataFrame.fillna` with ``method='ffill'``. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.fillna( - method="ffill", axis=axis, limit=limit, downcast=downcast, inplace=inplace - ) - - def fillna( - self, - self_is_series, - value: Hashable | Mapping | pd.Series | pd.DataFrame = None, - method: FillnaOptions | None = None, - axis: Axis | None = None, - inplace: bool = False, - limit: int | None = None, - downcast: dict | None = None, - ): - """ - Fill NA/NaN values using the specified method. - - Parameters - ---------- - self_is_series : bool - If True then self contains a Series object, if False then self contains - a DataFrame object. - value : scalar, dict, Series, or DataFrame, default: None - Value to use to fill holes (e.g. 0), alternately a - dict/Series/DataFrame of values specifying which value to use for - each index (for a Series) or column (for a DataFrame). Values not - in the dict/Series/DataFrame will not be filled. This value cannot - be a list. - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default: None - Method to use for filling holes in reindexed Series - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use next valid observation to fill gap. - axis : {None, 0, 1}, default: None - Axis along which to fill missing values. - inplace : bool, default: False - If True, fill in-place. Note: this will modify any - other views on this object (e.g., a no-copy slice for a column in a - DataFrame). - limit : int, default: None - If method is specified, this is the maximum number of consecutive - NaN values to forward/backward fill. In other words, if there is - a gap with more than this number of consecutive NaNs, it will only - be partially filled. If method is not specified, this is the - maximum number of entries along the entire axis where NaNs will be - filled. Must be greater than 0 if not None. - downcast : dict, default: None - A dict of item->dtype of what to downcast if possible, - or the string 'infer' which will try to downcast to an appropriate - equal type (e.g. float64 to int64 if possible). - - Returns - ------- - Series, DataFrame or None - Object with missing values filled or None if ``inplace=True``. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - raise_if_native_pandas_objects(value) - inplace = validate_bool_kwarg(inplace, "inplace") - axis = self._get_axis_number(axis) - if isinstance(value, (list, tuple)): - raise TypeError( - '"value" parameter must be a scalar or dict, but ' - + f'you passed a "{type(value).__name__}"' - ) - if value is None and method is None: - # same as pandas - raise ValueError("Must specify a fill 'value' or 'method'.") - if value is not None and method is not None: - raise ValueError("Cannot specify both 'value' and 'method'.") - if method is not None and method not in ["backfill", "bfill", "pad", "ffill"]: - expecting = "pad (ffill) or backfill (bfill)" - msg = "Invalid fill method. Expecting {expecting}. Got {method}".format( - expecting=expecting, method=method - ) - raise ValueError(msg) - if limit is not None: - if not isinstance(limit, int): - raise ValueError("Limit must be an integer") - elif limit <= 0: - raise ValueError("Limit must be greater than 0") - - new_query_compiler = self._query_compiler.fillna( - self_is_series=self_is_series, - value=value, - method=method, - axis=axis, - limit=limit, - downcast=downcast, - ) - return self._create_or_update_from_compiler(new_query_compiler, inplace) - - def filter( - self, items=None, like=None, regex=None, axis=None - ): # noqa: PR01, RT01, D200 - """ - Subset the `BasePandasDataset` rows or columns according to the specified index labels. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - nkw = count_not_none(items, like, regex) - if nkw > 1: - raise TypeError( - "Keyword arguments `items`, `like`, or `regex` are mutually exclusive" - ) - if nkw == 0: - raise TypeError("Must pass either `items`, `like`, or `regex`") - if axis is None: - axis = "columns" # This is the default info axis for dataframes - - axis = self._get_axis_number(axis) - labels = self.columns if axis else self.index - - if items is not None: - bool_arr = labels.isin(items) - elif like is not None: - - def f(x): - return like in str(x) - - bool_arr = labels.map(f).tolist() - else: - - def f(x): - return matcher.search(str(x)) is not None - - matcher = re.compile(regex) - bool_arr = labels.map(f).tolist() - if not axis: - return self[bool_arr] - return self[self.columns[bool_arr]] - - def first(self, offset): # noqa: PR01, RT01, D200 - """ - Select initial periods of time series data based on a date offset. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.loc[pandas.Series(index=self.index).first(offset).index] - - def first_valid_index(self) -> Scalar | tuple[Scalar]: - """ - Return index for first non-NA value or None, if no non-NA value is found. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._query_compiler.first_valid_index() - - def floordiv( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `floordiv`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "floordiv", other, axis=axis, level=level, fill_value=fill_value - ) - - def ge(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 - """ - Get greater than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ge`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("ge", other, axis=axis, level=level, dtypes=np.bool_) - - def get(self, key, default=None): # noqa: PR01, RT01, D200 - """ - Get item from object for given key. - """ - try: - return self.__getitem__(key) - except (KeyError, ValueError, IndexError): - return default - - def gt(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 - """ - Get greater than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `gt`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("gt", other, axis=axis, level=level, dtypes=np.bool_) - - def head(self, n: int = 5): - """ - Return the first `n` rows. - """ - return self.iloc[:n] - - @property - def iat(self, axis=None): # noqa: PR01, RT01, D200 - """ - Get a single value for a row/column pair by integer position. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - from .indexing import _iAtIndexer - - return _iAtIndexer(self) - - def idxmax(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, D200 - """ - Return index of first occurrence of maximum over requested axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - dtypes = self._get_dtypes() - if ( - axis == 1 - and not numeric_only - and any(not is_numeric_dtype(d) for d in dtypes) - and len(set(dtypes)) > 1 - ): - # For numeric_only=False, if we have any non-numeric dtype, e.g. - # a string type, we need every other column to be of the same type. - # We can't compare two objects of different non-numeric types, e.g. - # a string and a timestamp. - # If we have only numeric data, we can compare columns even if they - # different types, e.g. we can compare an int column to a float - # column. - raise TypeError("'>' not supported for these dtypes") - axis = self._get_axis_number(axis) - return self._reduce_dimension( - self._query_compiler.idxmax( - axis=axis, skipna=skipna, numeric_only=numeric_only - ) - ) - - def idxmin(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, D200 - """ - Return index of first occurrence of minimum over requested axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - dtypes = self._get_dtypes() - if ( - axis == 1 - and not numeric_only - and any(not is_numeric_dtype(d) for d in dtypes) - and len(set(dtypes)) > 1 - ): - # For numeric_only=False, if we have any non-numeric dtype, e.g. - # a string type, we need every other column to be of the same type. - # We can't compare two objects of different non-numeric types, e.g. - # a string and a timestamp. - # If we have only numeric data, we can compare columns even if they - # different types, e.g. we can compare an int column to a float - # column. - raise TypeError("'<' not supported for these dtypes") - axis = self._get_axis_number(axis) - return self._reduce_dimension( - self._query_compiler.idxmin( - axis=axis, skipna=skipna, numeric_only=numeric_only - ) - ) - - def infer_objects( - self, copy: bool | None = None - ) -> BasePandasDataset: # pragma: no cover # noqa: RT01, D200 - """ - Attempt to infer better dtypes for object columns. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - new_query_compiler = self._query_compiler.infer_objects() - return self._create_or_update_from_compiler( - new_query_compiler, inplace=False if copy is None else not copy - ) - - def convert_dtypes( - self, - infer_objects: bool = True, - convert_string: bool = True, - convert_integer: bool = True, - convert_boolean: bool = True, - convert_floating: bool = True, - dtype_backend: DtypeBackend = "numpy_nullable", - ): # noqa: PR01, RT01, D200 - """ - Convert columns to best possible dtypes using dtypes supporting ``pd.NA``. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.__constructor__( - query_compiler=self._query_compiler.convert_dtypes( - infer_objects=infer_objects, - convert_string=convert_string, - convert_integer=convert_integer, - convert_boolean=convert_boolean, - convert_floating=convert_floating, - dtype_backend=dtype_backend, - ) - ) - - def isin( - self, values: BasePandasDataset | ListLike | dict[Hashable, ListLike] - ) -> BasePandasDataset: # noqa: PR01, RT01, D200 - """ - Whether elements in `BasePandasDataset` are contained in `values`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - - # Pass as query compiler if values is BasePandasDataset. - if isinstance(values, BasePandasDataset): - values = values._query_compiler - - # Convert non-dict values to List if values is neither List[Any] nor np.ndarray. SnowflakeQueryCompiler - # expects for the non-lazy case, where values is not a BasePandasDataset, the data to be materialized - # as list or numpy array. Because numpy may perform implicit type conversions, use here list to be more general. - elif not isinstance(values, dict) and ( - not isinstance(values, list) or not isinstance(values, np.ndarray) - ): - values = list(values) - - return self.__constructor__( - query_compiler=self._query_compiler.isin(values=values) - ) - - def isna(self): # noqa: RT01, D200 - """ - Detect missing values. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.__constructor__(query_compiler=self._query_compiler.isna()) - - isnull = isna - - @property - def iloc(self): - """ - Purely integer-location based indexing for selection by position. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - # TODO: SNOW-930028 enable all skipped doctests - from .indexing import _iLocIndexer - - return _iLocIndexer(self) - - def kurt(self, axis=no_default, skipna=True, numeric_only=False, **kwargs): - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - validate_bool_kwarg(skipna, "skipna", none_allowed=False) - axis = self._get_axis_number(axis) - if numeric_only is not None and not numeric_only: - self._validate_dtypes(numeric_only=True) - - data = ( - self._get_numeric_data(axis) - if numeric_only is None or numeric_only - else self - ) - - return self._reduce_dimension( - data._query_compiler.kurt( - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - ) - - kurtosis = kurt - - def last(self, offset): # noqa: PR01, RT01, D200 - """ - Select final periods of time series data based on a date offset. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.loc[pandas.Series(index=self.index).last(offset).index] - - def last_valid_index(self) -> Scalar | tuple[Scalar]: - """ - Return index for last non-NA value or None, if no non-NA value is found. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._query_compiler.last_valid_index() - - def le(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 - """ - Get less than or equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `le`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("le", other, axis=axis, level=level, dtypes=np.bool_) - - def lt(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 - """ - Get less than comparison of `BasePandasDataset` and `other`, element-wise (binary operator `lt`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("lt", other, axis=axis, level=level, dtypes=np.bool_) - - @property - def loc(self): - """ - Get a group of rows and columns by label(s) or a boolean array. - """ - # TODO: SNOW-935444 fix doctest where index key has name - # TODO: SNOW-933782 fix multiindex transpose bug, e.g., Name: (cobra, mark ii) => Name: ('cobra', 'mark ii') - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - from .indexing import _LocIndexer - - return _LocIndexer(self) - - def _agg_helper( - self, - func: str, - skipna: bool = True, - axis: int | None | NoDefault = no_default, - numeric_only: bool = False, - **kwargs: Any, - ): - if not self._is_dataframe and numeric_only and not is_numeric_dtype(self.dtype): - # Series aggregations on non-numeric data do not support numeric_only: - # https://github.com/pandas-dev/pandas/blob/cece8c6579854f6b39b143e22c11cac56502c4fd/pandas/core/series.py#L6358 - raise TypeError( - f"Series.{func} does not allow numeric_only=True with non-numeric dtypes." - ) - axis = self._get_axis_number(axis) - numeric_only = validate_bool_kwarg( - numeric_only, "numeric_only", none_allowed=True - ) - skipna = validate_bool_kwarg(skipna, "skipna", none_allowed=False) - agg_kwargs: dict[str, Any] = { - "numeric_only": numeric_only, - "skipna": skipna, - } - agg_kwargs.update(kwargs) - return self.aggregate(func=func, axis=axis, **agg_kwargs) - - def max( - self, - axis: Axis | None = 0, - skipna: bool = True, - numeric_only: bool = False, - **kwargs: Any, - ): - """ - Return the maximum of the values over the requested axis. - """ - return self._agg_helper( - func="max", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - - def _stat_operation( - self, - op_name: str, - axis: int | str, - skipna: bool, - numeric_only: bool = False, - **kwargs, - ): - """ - Do common statistic reduce operations under frame. - - Parameters - ---------- - op_name : str - Name of method to apply. - axis : int or str - Axis to apply method on. - skipna : bool - Exclude NA/null values when computing the result. - numeric_only : bool - Include only float, int, boolean columns. - **kwargs : dict - Additional keyword arguments to pass to `op_name`. - - Returns - ------- - scalar or Series - `scalar` - self is Series - `Series` - self is DataFrame - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - validate_bool_kwarg(skipna, "skipna", none_allowed=False) - if not numeric_only: - self._validate_dtypes(numeric_only=True) - - data = self._get_numeric_data(axis) if numeric_only else self - result_qc = getattr(data._query_compiler, op_name)( - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - result_qc = self._reduce_dimension(result_qc) - return result_qc - - def memory_usage(self, index=True, deep=False): # noqa: PR01, RT01, D200 - """ - Return the memory usage of the `BasePandasDataset`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._reduce_dimension( - self._query_compiler.memory_usage(index=index, deep=deep) - ) - - def min( - self, - axis: Axis | None | NoDefault = no_default, - skipna: bool = True, - numeric_only: bool = False, - **kwargs, - ): - """ - Return the minimum of the values over the requested axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._agg_helper( - func="min", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - - def mod( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `mod`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "mod", other, axis=axis, level=level, fill_value=fill_value - ) - - def mode(self, axis=0, numeric_only=False, dropna=True): # noqa: PR01, RT01, D200 - """ - Get the mode(s) of each element along the selected axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - return self.__constructor__( - query_compiler=self._query_compiler.mode( - axis=axis, numeric_only=numeric_only, dropna=dropna - ) - ) - - def mul( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get multiplication of `BasePandasDataset` and `other`, element-wise (binary operator `mul`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "mul", other, axis=axis, level=level, fill_value=fill_value - ) - - multiply = mul - - def ne(self, other, axis="columns", level=None): # noqa: PR01, RT01, D200 - """ - Get Not equal comparison of `BasePandasDataset` and `other`, element-wise (binary operator `ne`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("ne", other, axis=axis, level=level, dtypes=np.bool_) - - def notna(self): # noqa: RT01, D200 - """ - Detect existing (non-missing) values. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.__constructor__(query_compiler=self._query_compiler.notna()) - - notnull = notna - - def nunique(self, axis=0, dropna=True): # noqa: PR01, RT01, D200 - """ - Return number of unique elements in the `BasePandasDataset`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - result = self._reduce_dimension( - self._query_compiler.nunique(axis=axis, dropna=dropna) - ) - return result - - def pad( - self, - axis: Axis | None = None, - inplace: bool = False, - limit: int | None = None, - downcast: dict | None = None, - ): - """ - Synonym for `DataFrame.fillna` with ``method='ffill'``. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - warnings.warn( - "Series/DataFrame.pad is deprecated. Use Series/DataFrame.ffill instead.", - FutureWarning, - stacklevel=1, - ) - return self.fillna( - method="ffill", axis=axis, limit=limit, downcast=downcast, inplace=inplace - ) - - def pct_change( - self, periods=1, fill_method=no_default, limit=no_default, freq=None, **kwargs - ): # noqa: PR01, RT01, D200 - """ - Percentage change between the current and a prior element. - """ - if fill_method not in (lib.no_default, None) or limit is not lib.no_default: - warnings.warn( - "The 'fill_method' keyword being not None and the 'limit' keyword in " - + f"{type(self).__name__}.pct_change are deprecated and will be removed " - + "in a future version. Either fill in any non-leading NA values prior " - + "to calling pct_change or specify 'fill_method=None' to not fill NA " - + "values.", - FutureWarning, - stacklevel=1, - ) - if fill_method is lib.no_default: - warnings.warn( - f"The default fill_method='pad' in {type(self).__name__}.pct_change is " - + "deprecated and will be removed in a future version. Either fill in any " - + "non-leading NA values prior to calling pct_change or specify 'fill_method=None' " - + "to not fill NA values.", - FutureWarning, - stacklevel=1, - ) - fill_method = "pad" - - if limit is lib.no_default: - limit = None - - if "axis" in kwargs: - kwargs["axis"] = self._get_axis_number(kwargs["axis"]) - - # Attempting to match pandas error behavior here - if not isinstance(periods, int): - raise TypeError(f"periods must be an int. got {type(periods)} instead") - - # Attempting to match pandas error behavior here - for dtype in self._get_dtypes(): - if not is_numeric_dtype(dtype): - raise TypeError( - f"cannot perform pct_change on non-numeric column with dtype {dtype}" - ) - - return self.__constructor__( - query_compiler=self._query_compiler.pct_change( - periods=periods, - fill_method=fill_method, - limit=limit, - freq=freq, - **kwargs, - ) - ) - - def pipe(self, func, *args, **kwargs): # noqa: PR01, RT01, D200 - """ - Apply chainable functions that expect `BasePandasDataset`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return pipe(self, func, *args, **kwargs) - - def pop(self, item): # noqa: PR01, RT01, D200 - """ - Return item and drop from frame. Raise KeyError if not found. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - result = self[item] - del self[item] - return result - - def pow( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `pow`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "pow", other, axis=axis, level=level, fill_value=fill_value - ) - - def quantile( - self, - q: Scalar | ListLike = 0.5, - axis: Axis = 0, - numeric_only: bool = False, - interpolation: Literal[ - "linear", "lower", "higher", "midpoint", "nearest" - ] = "linear", - method: Literal["single", "table"] = "single", - ) -> float | BasePandasDataset: - """ - Return values at the given quantile over requested axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - - # TODO - # - SNOW-1008361: support axis=1 - # - SNOW-1008367: support when q is Snowpandas DF/Series (need to require QC interface to accept QC q values) - # - SNOW-1003587: support datetime/timedelta columns - - if ( - axis == 1 - or interpolation not in ["linear", "nearest"] - or method != "single" - ): - ErrorMessage.not_implemented( - f"quantile function with parameters axis={axis}, interpolation={interpolation}, method={method} not supported" - ) - - if not numeric_only: - # If not numeric_only and columns, then check all columns are either - # numeric, timestamp, or timedelta - # Check if dtype is numeric, timedelta ("m"), or datetime ("M") - if not axis and not all( - is_numeric_dtype(t) or lib.is_np_dtype(t, "mM") - for t in self._get_dtypes() - ): - raise TypeError("can't multiply sequence by non-int of type 'float'") - # If over rows, then make sure that all dtypes are equal for not - # numeric_only - elif axis: - for i in range(1, len(self._get_dtypes())): - pre_dtype = self._get_dtypes()[i - 1] - curr_dtype = self._get_dtypes()[i] - if not is_dtype_equal(pre_dtype, curr_dtype): - raise TypeError( - "Cannot compare type '{}' with type '{}'".format( - pre_dtype, curr_dtype - ) - ) - else: - # Normally pandas returns this near the end of the quantile, but we - # can't afford the overhead of running the entire operation before - # we error. - if not any(is_numeric_dtype(t) for t in self._get_dtypes()): - raise ValueError("need at least one array to concatenate") - - # check that all qs are between 0 and 1 - validate_percentile(q) - axis = self._get_axis_number(axis) - query_compiler = self._query_compiler.quantiles_along_axis0( - q=q if is_list_like(q) else [q], - numeric_only=numeric_only, - interpolation=interpolation, - method=method, - ) - if is_list_like(q): - return self.__constructor__(query_compiler=query_compiler) - else: - # result is either a scalar or Series - result = self._reduce_dimension(query_compiler.transpose_single_row()) - if isinstance(result, BasePandasDataset): - result.name = q - return result - - @_inherit_docstrings(pandas.DataFrame.rank, apilink="pandas.DataFrame.rank") - def rank( - self, - axis=0, - method: str = "average", - numeric_only: bool = False, - na_option: str = "keep", - ascending: bool = True, - pct: bool = False, - ): - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - return self.__constructor__( - query_compiler=self._query_compiler.rank( - axis=axis, - method=method, - numeric_only=numeric_only, - na_option=na_option, - ascending=ascending, - pct=pct, - ) - ) - - def _copy_index_metadata(self, source, destination): # noqa: PR01, RT01, D200 - """ - Copy Index metadata from `source` to `destination` inplace. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if hasattr(source, "name") and hasattr(destination, "name"): - destination.name = source.name - if hasattr(source, "names") and hasattr(destination, "names"): - destination.names = source.names - return destination - - def _ensure_index(self, index_like, axis=0): # noqa: PR01, RT01, D200 - """ - Ensure that we have an index from some index-like object. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if ( - self._query_compiler.has_multiindex(axis=axis) - and not isinstance(index_like, pandas.Index) - and is_list_like(index_like) - and len(index_like) > 0 - and isinstance(index_like[0], tuple) - ): - try: - return pandas.MultiIndex.from_tuples(index_like) - except TypeError: - # not all tuples - pass - return ensure_index(index_like) - - def reindex( - self, - index=None, - columns=None, - copy=True, - **kwargs, - ): # noqa: PR01, RT01, D200 - """ - Conform `BasePandasDataset` to new index with optional filling logic. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if kwargs.get("limit", None) is not None and kwargs.get("method", None) is None: - raise ValueError( - "limit argument only valid if doing pad, backfill or nearest reindexing" - ) - new_query_compiler = None - if index is not None: - if not isinstance(index, pandas.Index) or not index.equals(self.index): - new_query_compiler = self._query_compiler.reindex( - axis=0, labels=index, **kwargs - ) - if new_query_compiler is None: - new_query_compiler = self._query_compiler - final_query_compiler = None - if columns is not None: - if not isinstance(index, pandas.Index) or not columns.equals(self.columns): - final_query_compiler = new_query_compiler.reindex( - axis=1, labels=columns, **kwargs - ) - if final_query_compiler is None: - final_query_compiler = new_query_compiler - return self._create_or_update_from_compiler( - final_query_compiler, inplace=False if copy is None else not copy - ) - - def rename_axis( - self, - mapper=lib.no_default, - *, - index=lib.no_default, - columns=lib.no_default, - axis=0, - copy=None, - inplace=False, - ): # noqa: PR01, RT01, D200 - """ - Set the name of the axis for the index or columns. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axes = {"index": index, "columns": columns} - - if copy is None: - copy = True - - if axis is not None: - axis = self._get_axis_number(axis) - else: - axis = 0 - - inplace = validate_bool_kwarg(inplace, "inplace") - - if mapper is not lib.no_default and mapper is not None: - # Use v0.23 behavior if a scalar or list - non_mapper = is_scalar(mapper) or ( - is_list_like(mapper) and not is_dict_like(mapper) - ) - if non_mapper: - return self._set_axis_name(mapper, axis=axis, inplace=inplace) - else: - raise ValueError("Use `.rename` to alter labels with a mapper.") - else: - # Use new behavior. Means that index and/or columns is specified - result = self if inplace else self.copy(deep=copy) - - for axis in range(self.ndim): - v = axes.get(pandas.DataFrame._get_axis_name(axis)) - if v is lib.no_default: - continue - non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) - if non_mapper: - newnames = v - else: - - def _get_rename_function(mapper): - if isinstance(mapper, (dict, BasePandasDataset)): - - def f(x): - if x in mapper: - return mapper[x] - else: - return x - - else: - f = mapper - - return f - - f = _get_rename_function(v) - curnames = self.index.names if axis == 0 else self.columns.names - newnames = [f(name) for name in curnames] - result._set_axis_name(newnames, axis=axis, inplace=True) - if not inplace: - return result - - def reorder_levels(self, order, axis=0): # noqa: PR01, RT01, D200 - """ - Rearrange index levels using input order. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - new_labels = self.axes[axis].reorder_levels(order) - return self.set_axis(new_labels, axis=axis) - - def resample( - self, - rule, - axis: Axis = lib.no_default, - closed: str | None = None, - label: str | None = None, - convention: str = "start", - kind: str | None = None, - on: Level = None, - level: Level = None, - origin: str | TimestampConvertibleTypes = "start_day", - offset: TimedeltaConvertibleTypes | None = None, - group_keys=no_default, - ): # noqa: PR01, RT01, D200 - """ - Resample time-series data. - """ - from .resample import Resampler - - if axis is not lib.no_default: # pragma: no cover - axis = self._get_axis_number(axis) - if axis == 1: - warnings.warn( - "DataFrame.resample with axis=1 is deprecated. Do " - + "`frame.T.resample(...)` without axis instead.", - FutureWarning, - stacklevel=1, - ) - else: - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.resample is " - + "deprecated and will be removed in a future version.", - FutureWarning, - stacklevel=1, - ) - else: - axis = 0 - - return Resampler( - dataframe=self, - rule=rule, - axis=axis, - closed=closed, - label=label, - convention=convention, - kind=kind, - on=on, - level=level, - origin=origin, - offset=offset, - group_keys=group_keys, - ) - - def reset_index( - self, - level: IndexLabel = None, - drop: bool = False, - inplace: bool = False, - col_level: Hashable = 0, - col_fill: Hashable = "", - allow_duplicates=no_default, - names: Hashable | Sequence[Hashable] = None, - ): - """ - Reset the index, or a level of it. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - inplace = validate_bool_kwarg(inplace, "inplace") - if allow_duplicates is no_default: - allow_duplicates = False - new_query_compiler = self._query_compiler.reset_index( - drop=drop, - level=level, - col_level=col_level, - col_fill=col_fill, - allow_duplicates=allow_duplicates, - names=names, - ) - return self._create_or_update_from_compiler(new_query_compiler, inplace) - - def radd( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Return addition of `BasePandasDataset` and `other`, element-wise (binary operator `radd`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "radd", other, axis=axis, level=level, fill_value=fill_value - ) - - def rfloordiv( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get integer division of `BasePandasDataset` and `other`, element-wise (binary operator `rfloordiv`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "rfloordiv", other, axis=axis, level=level, fill_value=fill_value - ) - - def rmod( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get modulo of `BasePandasDataset` and `other`, element-wise (binary operator `rmod`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "rmod", other, axis=axis, level=level, fill_value=fill_value - ) - - def rmul( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get Multiplication of dataframe and other, element-wise (binary operator `rmul`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "rmul", other, axis=axis, level=level, fill_value=fill_value - ) - - def rolling( - self, - window, - min_periods: int | None = None, - center: bool = False, - win_type: str | None = None, - on: str | None = None, - axis: Axis = lib.no_default, - closed: str | None = None, - step: int | None = None, - method: str = "single", - ): # noqa: PR01, RT01, D200 - """ - Provide rolling window calculations. - """ - if axis is not lib.no_default: - axis = self._get_axis_number(axis) - name = "rolling" - if axis == 1: - warnings.warn( - f"Support for axis=1 in {type(self).__name__}.{name} is " - + "deprecated and will be removed in a future version. " - + f"Use obj.T.{name}(...) instead", - FutureWarning, - stacklevel=1, - ) - else: # pragma: no cover - warnings.warn( - f"The 'axis' keyword in {type(self).__name__}.{name} is " - + "deprecated and will be removed in a future version. " - + "Call the method without the axis keyword instead.", - FutureWarning, - stacklevel=1, - ) - else: - axis = 0 - - if win_type is not None: - from .window import Window - - return Window( - self, - window=window, - min_periods=min_periods, - center=center, - win_type=win_type, - on=on, - axis=axis, - closed=closed, - step=step, - method=method, - ) - from .window import Rolling - - return Rolling( - self, - window=window, - min_periods=min_periods, - center=center, - win_type=win_type, - on=on, - axis=axis, - closed=closed, - step=step, - method=method, - ) - - def round(self, decimals=0, *args, **kwargs): # noqa: PR01, RT01, D200 - """ - Round a `BasePandasDataset` to a variable number of decimal places. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - # FIXME: Judging by pandas docs `*args` and `**kwargs` serves only compatibility - # purpose and does not affect the result, we shouldn't pass them to the query compiler. - return self.__constructor__( - query_compiler=self._query_compiler.round(decimals=decimals, **kwargs) - ) - - def rpow( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get exponential power of `BasePandasDataset` and `other`, element-wise (binary operator `rpow`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "rpow", other, axis=axis, level=level, fill_value=fill_value - ) - - def rsub( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `rsub`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "rsub", other, axis=axis, level=level, fill_value=fill_value - ) - - def rtruediv( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `rtruediv`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "rtruediv", other, axis=axis, level=level, fill_value=fill_value - ) - - rdiv = rtruediv - - def sample( - self, - n: int | None = None, - frac: float | None = None, - replace: bool = False, - weights: str | np.ndarray | None = None, - random_state: RandomState | None = None, - axis: Axis | None = None, - ignore_index: bool = False, - ): - """ - Return a random sample of items from an axis of object. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if self._get_axis_number(axis): - if weights is not None and isinstance(weights, str): - raise ValueError( - "Strings can only be passed to weights when sampling from rows on a DataFrame" - ) - else: - if n is None and frac is None: - n = 1 - elif n is not None and frac is not None: - raise ValueError("Please enter a value for `frac` OR `n`, not both") - else: - if n is not None: - if n < 0: - raise ValueError( - "A negative number of rows requested. Please provide `n` >= 0." - ) - if n % 1 != 0: - raise ValueError("Only integers accepted as `n` values") - else: - if frac < 0: - raise ValueError( - "A negative number of rows requested. Please provide `frac` >= 0." - ) - - query_compiler = self._query_compiler.sample( - n, frac, replace, weights, random_state, axis, ignore_index - ) - return self.__constructor__(query_compiler=query_compiler) - - def sem( - self, - axis: Axis | None = None, - skipna: bool = True, - ddof: int = 1, - numeric_only=False, - **kwargs, - ): # noqa: PR01, RT01, D200 - """ - Return unbiased standard error of the mean over requested axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._stat_operation( - "sem", axis, skipna, numeric_only, ddof=ddof, **kwargs - ) - - def mean( - self, - axis: Axis | None | NoDefault = no_default, - skipna: bool = True, - numeric_only: bool = False, - **kwargs: Any, - ): - """ - Return the mean of the values over the requested axis. - """ - return self._agg_helper( - func="mean", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - - def median( - self, - axis: Axis | None | NoDefault = no_default, - skipna: bool = True, - numeric_only: bool = False, - **kwargs: Any, - ): - """ - Return the mean of the values over the requested axis. - """ - return self._agg_helper( - func="median", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - - def set_flags( - self, *, copy: bool = False, allows_duplicate_labels: bool | None = None - ): # noqa: PR01, RT01, D200 - """ - Return a new `BasePandasDataset` with updated flags. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - pandas.DataFrame.set_flags, - copy=copy, - allows_duplicate_labels=allows_duplicate_labels, - ) - - @property - def flags(self): - return self._default_to_pandas(lambda df: df.flags) - - def shift( - self, - periods: int | Sequence[int] = 1, - freq=None, - axis: Axis = 0, - fill_value: Hashable = no_default, - suffix: str | None = None, - ) -> BasePandasDataset: - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if periods == 0 and freq is None: - # Check obvious case first, freq manipulates the index even for periods == 0 so check for it in addition. - return self.copy() - - # pandas compatible ValueError for freq='infer' - # TODO: Test as part of SNOW-1023324. - if freq == "infer": # pragma: no cover - if not hasattr(self, "freq") and not hasattr( # pragma: no cover - self, "inferred_freq" # pragma: no cover - ): # pragma: no cover - raise ValueError() # pragma: no cover - - axis = self._get_axis_number(axis) - - if fill_value == no_default: - fill_value = None - - new_query_compiler = self._query_compiler.shift( - periods, freq, axis, fill_value, suffix - ) - return self._create_or_update_from_compiler(new_query_compiler, False) - - def skew( - self, - axis: Axis | None | NoDefault = no_default, - skipna: bool = True, - numeric_only=True, - **kwargs, - ): # noqa: PR01, RT01, D200 - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - """ - Return unbiased skew over requested axis. - """ - return self._stat_operation("skew", axis, skipna, numeric_only, **kwargs) - - def sort_index( - self, - axis=0, - level=None, - ascending=True, - inplace=False, - kind="quicksort", - na_position="last", - sort_remaining=True, - ignore_index: bool = False, - key: IndexKeyFunc | None = None, - ): # noqa: PR01, RT01, D200 - """ - Sort object by labels (along an axis). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - # pandas throws this exception. See pandas issue #39434 - if ascending is None: - raise ValueError( - "the `axis` parameter is not supported in the pandas implementation of argsort()" - ) - axis = self._get_axis_number(axis) - inplace = validate_bool_kwarg(inplace, "inplace") - new_query_compiler = self._query_compiler.sort_index( - axis=axis, - level=level, - ascending=ascending, - kind=kind, - na_position=na_position, - sort_remaining=sort_remaining, - ignore_index=ignore_index, - key=key, - ) - return self._create_or_update_from_compiler(new_query_compiler, inplace) - - def sort_values( - self, - by, - axis=0, - ascending=True, - inplace: bool = False, - kind="quicksort", - na_position="last", - ignore_index: bool = False, - key: IndexKeyFunc | None = None, - ): # noqa: PR01, RT01, D200 - """ - Sort by the values along either axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - inplace = validate_bool_kwarg(inplace, "inplace") - ascending = validate_ascending(ascending) - if axis == 0: - # If any column is None raise KeyError (same a native pandas). - if by is None or (isinstance(by, list) and None in by): - # Same error message as native pandas. - raise KeyError(None) - if not isinstance(by, list): - by = [by] - - # Convert 'ascending' to sequence if needed. - if not isinstance(ascending, Sequence): - ascending = [ascending] * len(by) - if len(by) != len(ascending): - # Same error message as native pandas. - raise ValueError( - f"Length of ascending ({len(ascending)})" - f" != length of by ({len(by)})" - ) - - columns = self._query_compiler.columns.values.tolist() - index_names = self._query_compiler.get_index_names() - for by_col in by: - col_count = columns.count(by_col) - index_count = index_names.count(by_col) - if col_count == 0 and index_count == 0: - # Same error message as native pandas. - raise KeyError(by_col) - if col_count and index_count: - # Same error message as native pandas. - raise ValueError( - f"'{by_col}' is both an index level and a column label, which is ambiguous." - ) - if col_count > 1: - # Same error message as native pandas. - raise ValueError(f"The column label '{by_col}' is not unique.") - - if na_position not in get_args(NaPosition): - # Same error message as native pandas for invalid 'na_position' value. - raise ValueError(f"invalid na_position: {na_position}") - result = self._query_compiler.sort_rows_by_column_values( - by, - ascending=ascending, - kind=kind, - na_position=na_position, - ignore_index=ignore_index, - key=key, - ) - else: - result = self._query_compiler.sort_columns_by_row_values( - by, - ascending=ascending, - kind=kind, - na_position=na_position, - ignore_index=ignore_index, - key=key, - ) - return self._create_or_update_from_compiler(result, inplace) - - def std( - self, - axis: Axis | None = None, - skipna: bool = True, - ddof: int = 1, - numeric_only: bool = False, - **kwargs, - ): - """ - Return sample standard deviation over requested axis. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - kwargs.update({"ddof": ddof}) - return self._agg_helper( - func="std", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - - def sum( - self, - axis: Axis | None = None, - skipna: bool = True, - numeric_only: bool = False, - min_count: int = 0, - **kwargs: Any, - ): - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - min_count = validate_int_kwarg(min_count, "min_count") - kwargs.update({"min_count": min_count}) - return self._agg_helper( - func="sum", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - - def sub( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get subtraction of `BasePandasDataset` and `other`, element-wise (binary operator `sub`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "sub", other, axis=axis, level=level, fill_value=fill_value - ) - - subtract = sub - - def swapaxes(self, axis1, axis2, copy=True): # noqa: PR01, RT01, D200 - """ - Interchange axes and swap values axes appropriately. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis1 = self._get_axis_number(axis1) - axis2 = self._get_axis_number(axis2) - if axis1 != axis2: - return self.transpose() - if copy: - return self.copy() - return self - - def swaplevel(self, i=-2, j=-1, axis=0): # noqa: PR01, RT01, D200 - """ - Swap levels `i` and `j` in a `MultiIndex`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - idx = self.index if axis == 0 else self.columns - return self.set_axis(idx.swaplevel(i, j), axis=axis) - - def tail(self, n: int = 5): - if n == 0: - return self.iloc[0:0] - return self.iloc[-n:] - - def take( - self, - indices: list | AnyArrayLike | slice, - axis: Axis = 0, - **kwargs, - ): - """ - Return the elements in the given *positional* indices along an axis. - """ - axis = self._get_axis_number(axis) - slice_obj = indices if axis == 0 else (slice(None), indices) - return self.iloc[slice_obj] - - def to_clipboard( - self, excel=True, sep=None, **kwargs - ): # pragma: no cover # noqa: PR01, RT01, D200 - """ - Copy object to the system clipboard. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas("to_clipboard", excel=excel, sep=sep, **kwargs) - - def to_csv( - self, - path_or_buf=None, - sep=",", - na_rep="", - float_format=None, - columns=None, - header=True, - index=True, - index_label=None, - mode="w", - encoding=None, - compression="infer", - quoting=None, - quotechar='"', - lineterminator=None, - chunksize=None, - date_format=None, - doublequote=True, - escapechar=None, - decimal=".", - errors: str = "strict", - storage_options: StorageOptions = None, - ): # pragma: no cover - from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import ( - FactoryDispatcher, - ) - - return FactoryDispatcher.to_csv( - self._query_compiler, - path_or_buf=path_or_buf, - sep=sep, - na_rep=na_rep, - float_format=float_format, - columns=columns, - header=header, - index=index, - index_label=index_label, - mode=mode, - encoding=encoding, - compression=compression, - quoting=quoting, - quotechar=quotechar, - lineterminator=lineterminator, - chunksize=chunksize, - date_format=date_format, - doublequote=doublequote, - escapechar=escapechar, - decimal=decimal, - errors=errors, - storage_options=storage_options, - ) - - def to_excel( - self, - excel_writer, - sheet_name="Sheet1", - na_rep="", - float_format=None, - columns=None, - header=True, - index=True, - index_label=None, - startrow=0, - startcol=0, - engine=None, - merge_cells=True, - encoding=no_default, - inf_rep="inf", - verbose=no_default, - freeze_panes=None, - storage_options: StorageOptions = None, - ): # pragma: no cover # noqa: PR01, RT01, D200 - """ - Write object to an Excel sheet. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - "to_excel", - excel_writer, - sheet_name=sheet_name, - na_rep=na_rep, - float_format=float_format, - columns=columns, - header=header, - index=index, - index_label=index_label, - startrow=startrow, - startcol=startcol, - engine=engine, - merge_cells=merge_cells, - inf_rep=inf_rep, - freeze_panes=freeze_panes, - storage_options=storage_options, - ) - - def to_hdf( - self, path_or_buf, key, format="table", **kwargs - ): # pragma: no cover # noqa: PR01, RT01, D200 - """ - Write the contained data to an HDF5 file using HDFStore. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - "to_hdf", path_or_buf, key, format=format, **kwargs - ) - - def to_json( - self, - path_or_buf=None, - orient=None, - date_format=None, - double_precision=10, - force_ascii=True, - date_unit="ms", - default_handler=None, - lines=False, - compression="infer", - index=True, - indent=None, - storage_options: StorageOptions = None, - ): # pragma: no cover # noqa: PR01, RT01, D200 - """ - Convert the object to a JSON string. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - "to_json", - path_or_buf, - orient=orient, - date_format=date_format, - double_precision=double_precision, - force_ascii=force_ascii, - date_unit=date_unit, - default_handler=default_handler, - lines=lines, - compression=compression, - index=index, - indent=indent, - storage_options=storage_options, - ) - - def to_latex( - self, - buf=None, - columns=None, - col_space=None, - header=True, - index=True, - na_rep="NaN", - formatters=None, - float_format=None, - sparsify=None, - index_names=True, - bold_rows=False, - column_format=None, - longtable=None, - escape=None, - encoding=None, - decimal=".", - multicolumn=None, - multicolumn_format=None, - multirow=None, - caption=None, - label=None, - position=None, - ): # pragma: no cover # noqa: PR01, RT01, D200 - """ - Render object to a LaTeX tabular, longtable, or nested table. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - "to_latex", - buf=buf, - columns=columns, - col_space=col_space, - header=header, - index=index, - na_rep=na_rep, - formatters=formatters, - float_format=float_format, - sparsify=sparsify, - index_names=index_names, - bold_rows=bold_rows, - column_format=column_format, - longtable=longtable, - escape=escape, - encoding=encoding, - decimal=decimal, - multicolumn=multicolumn, - multicolumn_format=multicolumn_format, - multirow=multirow, - caption=caption, - label=label, - position=position, - ) - - def to_markdown( - self, - buf=None, - mode: str = "wt", - index: bool = True, - storage_options: StorageOptions = None, - **kwargs, - ): # noqa: PR01, RT01, D200 - """ - Print `BasePandasDataset` in Markdown-friendly format. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - "to_markdown", - buf=buf, - mode=mode, - index=index, - storage_options=storage_options, - **kwargs, - ) - - def to_pickle( - self, - path, - compression: CompressionOptions = "infer", - protocol: int = pkl.HIGHEST_PROTOCOL, - storage_options: StorageOptions = None, - ): # pragma: no cover # noqa: PR01, D200 - """ - Pickle (serialize) object to file. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - from snowflake.snowpark.modin.pandas import to_pickle - - to_pickle( - self, - path, - compression=compression, - protocol=protocol, - storage_options=storage_options, - ) - - def to_numpy( - self, - dtype: npt.DTypeLike | None = None, - copy: bool = False, - na_value: object = no_default, - **kwargs: Any, - ) -> np.ndarray: - """ - Convert the `BasePandasDataset` to a NumPy array or a Modin wrapper for NumPy array. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - if copy: - WarningMessage.ignored_argument( - operation="to_numpy", - argument="copy", - message="copy is ignored in Snowflake backend", - ) - return self._query_compiler.to_numpy( - dtype=dtype, - na_value=na_value, - **kwargs, - ) - - # TODO(williamma12): When this gets implemented, have the series one call this. - def to_period( - self, freq=None, axis=0, copy=True - ): # pragma: no cover # noqa: PR01, RT01, D200 - """ - Convert `BasePandasDataset` from DatetimeIndex to PeriodIndex. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas("to_period", freq=freq, axis=axis, copy=copy) - - def to_string( - self, - buf=None, - columns=None, - col_space=None, - header=True, - index=True, - na_rep="NaN", - formatters=None, - float_format=None, - sparsify=None, - index_names=True, - justify=None, - max_rows=None, - min_rows=None, - max_cols=None, - show_dimensions=False, - decimal=".", - line_width=None, - max_colwidth=None, - encoding=None, - ): # noqa: PR01, RT01, D200 - """ - Render a `BasePandasDataset` to a console-friendly tabular output. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - "to_string", - buf=buf, - columns=columns, - col_space=col_space, - header=header, - index=index, - na_rep=na_rep, - formatters=formatters, - float_format=float_format, - sparsify=sparsify, - index_names=index_names, - justify=justify, - max_rows=max_rows, - max_cols=max_cols, - show_dimensions=show_dimensions, - decimal=decimal, - line_width=line_width, - max_colwidth=max_colwidth, - encoding=encoding, - ) - - def to_sql( - self, - name, - con, - schema=None, - if_exists="fail", - index=True, - index_label=None, - chunksize=None, - dtype=None, - method=None, - ): # noqa: PR01, D200 - """ - Write records stored in a `BasePandasDataset` to a SQL database. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - new_query_compiler = self._query_compiler - # writing the index to the database by inserting it to the DF - if index: - if not index_label: - index_label = "index" - new_query_compiler = new_query_compiler.insert(0, index_label, self.index) - # so pandas._to_sql will not write the index to the database as well - index = False - - from modin.core.execution.dispatching.factories.dispatcher import ( - FactoryDispatcher, - ) - - FactoryDispatcher.to_sql( - new_query_compiler, - name=name, - con=con, - schema=schema, - if_exists=if_exists, - index=index, - index_label=index_label, - chunksize=chunksize, - dtype=dtype, - method=method, - ) - - # TODO(williamma12): When this gets implemented, have the series one call this. - def to_timestamp( - self, freq=None, how="start", axis=0, copy=True - ): # noqa: PR01, RT01, D200 - """ - Cast to DatetimeIndex of timestamps, at *beginning* of period. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas( - "to_timestamp", freq=freq, how=how, axis=axis, copy=copy - ) - - def to_xarray(self): # noqa: PR01, RT01, D200 - """ - Return an xarray object from the `BasePandasDataset`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas("to_xarray") - - def truediv( - self, other, axis="columns", level=None, fill_value=None - ): # noqa: PR01, RT01, D200 - """ - Get floating division of `BasePandasDataset` and `other`, element-wise (binary operator `truediv`). - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op( - "truediv", other, axis=axis, level=level, fill_value=fill_value - ) - - div = divide = truediv - - def truncate( - self, before=None, after=None, axis=None, copy=True - ): # noqa: PR01, RT01, D200 - """ - Truncate a `BasePandasDataset` before and after some index value. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - axis = self._get_axis_number(axis) - if ( - not self.axes[axis].is_monotonic_increasing - and not self.axes[axis].is_monotonic_decreasing - ): - raise ValueError("truncate requires a sorted index") - s = slice(*self.axes[axis].slice_locs(before, after)) - slice_obj = s if axis == 0 else (slice(None), s) - return self.iloc[slice_obj] - - def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 - """ - Call ``func`` on self producing a `BasePandasDataset` with the same axis shape as self. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - kwargs["is_transform"] = True - self._validate_function(func) - try: - result = self.agg(func, axis=axis, *args, **kwargs) - except TypeError: - raise - except Exception as err: - raise ValueError("Transform function failed") from err - try: - assert len(result) == len(self) - except Exception: - raise ValueError("transforms cannot produce aggregated results") - return result - - def tz_convert(self, tz, axis=0, level=None, copy=None): # noqa: PR01, RT01, D200 - """ - Convert tz-aware axis to target time zone. - """ - if copy is None: - copy = True - return self._create_or_update_from_compiler( - self._query_compiler.tz_convert( - tz, axis=self._get_axis_number(axis), level=level, copy=copy - ), - inplace=(not copy), - ) - - def tz_localize( - self, tz, axis=0, level=None, copy=None, ambiguous="raise", nonexistent="raise" - ): # noqa: PR01, RT01, D200 - """ - Localize tz-naive index of a `BasePandasDataset` to target time zone. - """ - if copy is None: - copy = True - return self._create_or_update_from_compiler( - self._query_compiler.tz_localize( - tz, - axis=self._get_axis_number(axis), - level=level, - copy=copy, - ambiguous=ambiguous, - nonexistent=nonexistent, - ), - inplace=(not copy), - ) - - def var( - self, - axis: Axis | None = None, - skipna: bool = True, - ddof: int = 1, - numeric_only: bool = False, - **kwargs: Any, - ): - """ - Return unbiased variance over requested axis. - """ - kwargs.update({"ddof": ddof}) - return self._agg_helper( - func="var", - axis=axis, - skipna=skipna, - numeric_only=numeric_only, - **kwargs, - ) - - def __abs__(self): - """ - Return a `BasePandasDataset` with absolute numeric value of each element. - - Returns - ------- - BasePandasDataset - Object containing the absolute value of each element. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.abs() - - def __and__(self, other): - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("__and__", other, axis=0) - - def __rand__(self, other): - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("__rand__", other, axis=0) - - def __array__(self, dtype=None): - """ - Return the values as a NumPy array. - - Parameters - ---------- - dtype : str or np.dtype, optional - The dtype of returned array. - - Returns - ------- - arr : np.ndarray - NumPy representation of Modin object. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - WarningMessage.single_warning( - "Calling __array__ on a modin object materializes all data into local memory.\n" - + "Since this can be called by 3rd party libraries silently, it can lead to \n" - + "unexpected delays or high memory usage. Use to_pandas() or to_numpy() to do \n" - + "this once explicitly.", - ) - arr = self.to_numpy(dtype) - return arr - - def __copy__(self, deep=True): - """ - Return the copy of the `BasePandasDataset`. - - Parameters - ---------- - deep : bool, default: True - Whether the copy should be deep or not. - - Returns - ------- - BasePandasDataset - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.copy(deep=deep) - - def __deepcopy__(self, memo=None): - """ - Return the deep copy of the `BasePandasDataset`. - - Parameters - ---------- - memo : Any, optional - Deprecated parameter. - - Returns - ------- - BasePandasDataset - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.copy(deep=True) - - def __eq__(self, other): - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.eq(other) - - def __finalize__(self, other, method=None, **kwargs): - """ - Propagate metadata from `other` to `self`. - - Parameters - ---------- - other : BasePandasDataset - The object from which to get the attributes that we are going - to propagate. - method : str, optional - A passed method name providing context on where `__finalize__` - was called. - **kwargs : dict - Additional keywords arguments to be passed to `__finalize__`. - - Returns - ------- - BasePandasDataset - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._default_to_pandas("__finalize__", other, method=method, **kwargs) - - def __ge__(self, right): - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.ge(right) - - def __getitem__(self, key): - """ - Retrieve dataset according to `key`. - - Parameters - ---------- - key : callable, scalar, slice, str or tuple - The global row index to retrieve data from. - - Returns - ------- - BasePandasDataset - Located dataset. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - key = apply_if_callable(key, self) - # If a slice is passed in, use .iloc[key]. - if isinstance(key, slice): - if (is_integer(key.start) or key.start is None) and ( - is_integer(key.stop) or key.stop is None - ): - return self.iloc[key] - else: - return self.loc[key] - - # If the object calling getitem is a Series, only use .loc[key] to filter index. - if isinstance(self, pd.Series): - return self.loc[key] - - # Sometimes the result of a callable is a DataFrame (e.g. df[df > 0]) - use where. - elif isinstance(key, pd.DataFrame): - return self.where(cond=key) - - # If the object is a boolean list-like object, use .loc[key] to filter index. - # The if statement is structured this way to avoid calling dtype and reduce query count. - if isinstance(key, pd.Series): - if key.dtype == bool: - return self.loc[key] - elif is_list_like(key): - if hasattr(key, "dtype"): - if key.dtype == bool: - return self.loc[key] - if (all(is_bool(k) for k in key)) and len(key) > 0: - return self.loc[key] - - # In all other cases, use .loc[:, key] to filter columns. - return self.loc[:, key] - - __hash__ = None - - def __gt__(self, right): - return self.gt(right) - - def __invert__(self): - """ - Apply bitwise inverse to each element of the `BasePandasDataset`. - - Returns - ------- - BasePandasDataset - New BasePandasDataset containing bitwise inverse to each value. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.__constructor__(query_compiler=self._query_compiler.invert()) - - def __le__(self, right): - return self.le(right) - - def __len__(self) -> int: - """ - Return length of info axis. - - Returns - ------- - int - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._query_compiler.get_axis_len(axis=0) - - def __lt__(self, right): - return self.lt(right) - - def __matmul__(self, other): - """ - Compute the matrix multiplication between the `BasePandasDataset` and `other`. - - Parameters - ---------- - other : BasePandasDataset or array-like - The other object to compute the matrix product with. - - Returns - ------- - BasePandasDataset, np.ndarray or scalar - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.dot(other) - - def __ne__(self, other): - return self.ne(other) - - def __neg__(self): - """ - Change the sign for every value of self. - - Returns - ------- - BasePandasDataset - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.__constructor__( - query_compiler=self._query_compiler.unary_op("__neg__") - ) - - def __nonzero__(self): - """ - Evaluate `BasePandasDataset` as boolean object. - - Raises - ------ - ValueError - Always since truth value for self is ambiguous. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - raise ValueError( - f"The truth value of a {self.__class__.__name__} is ambiguous. " - + "Use a.empty, a.bool(), a.item(), a.any() or a.all()." - ) - - __bool__ = __nonzero__ - - def __or__(self, other): - return self._binary_op("__or__", other, axis=0) - - def __ror__(self, other): - return self._binary_op("__ror__", other, axis=0) - - def __sizeof__(self): - """ - Generate the total memory usage for an `BasePandasDataset`. - - Returns - ------- - int - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - - return self._default_to_pandas("__sizeof__") - - def __str__(self): # pragma: no cover - """ - Return str(self). - - Returns - ------- - str - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return repr(self) - - def __xor__(self, other): - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("__xor__", other, axis=0) - - def __rxor__(self, other): - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self._binary_op("__rxor__", other, axis=0) - - @property - def size(self) -> int: - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return np.prod(self.shape) # type: ignore[return-value] - - @property - def values(self) -> np.ndarray: - """ - Return a NumPy representation of the `BasePandasDataset`. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - return self.to_numpy() - - def _repartition(self, axis: int | None = None): - """ - Repartitioning Modin objects to get ideal partitions inside. - - Allows to improve performance where the query compiler can't improve - yet by doing implicit repartitioning. - - Parameters - ---------- - axis : {0, 1, None}, optional - The axis along which the repartitioning occurs. - `None` is used for repartitioning along both axes. - - Returns - ------- - DataFrame or Series - The repartitioned dataframe or series, depending on the original type. - """ - # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset - allowed_axis_values = (0, 1, None) - if axis not in allowed_axis_values: - raise ValueError( - f"Passed `axis` parameter: {axis}, but should be one of {allowed_axis_values}" - ) - return self.__constructor__( - query_compiler=self._query_compiler.repartition(axis=axis) - ) - - def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): - """ - Apply the `ufunc` to the `BasePandasDataset`. - - Parameters - ---------- - ufunc : np.ufunc - The NumPy ufunc to apply. - method : str - The method to apply. - *inputs : tuple - The inputs to the ufunc. - **kwargs : dict - Additional keyword arguments. - - Returns - ------- - BasePandasDataset - The result of the ufunc applied to the `BasePandasDataset`. - """ - # Use pandas version of ufunc if it exists - if method != "__call__": - # Return sentinel value NotImplemented - return NotImplemented - from snowflake.snowpark.modin.plugin.utils.numpy_to_pandas import ( - numpy_to_pandas_universal_func_map, - ) - - if ufunc.__name__ in numpy_to_pandas_universal_func_map: - ufunc = numpy_to_pandas_universal_func_map[ufunc.__name__] - return ufunc(self, inputs[1:], kwargs) - # return the sentinel NotImplemented if we do not support this function - return NotImplemented - - def __array_function__( - self, func: callable, types: tuple, args: tuple, kwargs: dict - ): - """ - Apply the `func` to the `BasePandasDataset`. - - Parameters - ---------- - func : np.func - The NumPy func to apply. - types : tuple - The types of the args. - args : tuple - The args to the func. - kwargs : dict - Additional keyword arguments. - - Returns - ------- - BasePandasDataset - The result of the ufunc applied to the `BasePandasDataset`. - """ - from snowflake.snowpark.modin.plugin.utils.numpy_to_pandas import ( - numpy_to_pandas_func_map, - ) - - if func.__name__ in numpy_to_pandas_func_map: - return numpy_to_pandas_func_map[func.__name__](*args, **kwargs) - else: - # per NEP18 we raise NotImplementedError so that numpy can intercept - return NotImplemented # pragma: no cover diff --git a/src/snowflake/snowpark/modin/pandas/dataframe.py b/src/snowflake/snowpark/modin/pandas/dataframe.py index a7d53813779..b42ad5a04c7 100644 --- a/src/snowflake/snowpark/modin/pandas/dataframe.py +++ b/src/snowflake/snowpark/modin/pandas/dataframe.py @@ -37,6 +37,7 @@ import numpy as np import pandas from modin.pandas.accessor import CachedAccessor, SparseFrameAccessor +from modin.pandas.base import BasePandasDataset # from . import _update_engine from modin.pandas.iterator import PartitionIterator @@ -73,7 +74,6 @@ from pandas.util._validators import validate_bool_kwarg from snowflake.snowpark.modin import pandas as pd -from snowflake.snowpark.modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset from snowflake.snowpark.modin.pandas.groupby import ( DataFrameGroupBy, validate_groupby_args, @@ -91,12 +91,14 @@ replace_external_data_keys_with_empty_pandas_series, replace_external_data_keys_with_query_compiler, ) +from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta from snowflake.snowpark.modin.plugin._internal.utils import is_repr_truncated from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike from snowflake.snowpark.modin.plugin.utils.error_message import ( ErrorMessage, dataframe_not_implemented, ) +from snowflake.snowpark.modin.plugin.utils.frontend_constants import _ATTRS_NO_LOOKUP from snowflake.snowpark.modin.plugin.utils.warning_message import ( SET_DATAFRAME_ATTRIBUTE_WARNING, WarningMessage, @@ -136,7 +138,7 @@ ], apilink="pandas.DataFrame", ) -class DataFrame(BasePandasDataset): +class DataFrame(BasePandasDataset, metaclass=TelemetryMeta): _pandas_class = pandas.DataFrame def __init__( diff --git a/src/snowflake/snowpark/modin/pandas/general.py b/src/snowflake/snowpark/modin/pandas/general.py index 8d933cd6a11..df19e9eac91 100644 --- a/src/snowflake/snowpark/modin/pandas/general.py +++ b/src/snowflake/snowpark/modin/pandas/general.py @@ -22,7 +22,7 @@ """Implement pandas general API.""" from __future__ import annotations -from collections.abc import Hashable, Iterable, Mapping, Sequence +from collections.abc import Callable, Hashable, Iterable, Mapping, Sequence from datetime import date, datetime, timedelta, tzinfo from logging import getLogger from typing import TYPE_CHECKING, Any, Literal, Union @@ -30,6 +30,7 @@ import numpy as np import pandas import pandas.core.common as common +from modin.pandas.base import BasePandasDataset from pandas import IntervalIndex, NaT, Timedelta, Timestamp from pandas._libs import NaTType, lib from pandas._libs.tslibs import to_offset @@ -48,7 +49,7 @@ _infer_tz_from_endpoints, _maybe_normalize_endpoints, ) -from pandas.core.dtypes.common import is_list_like +from pandas.core.dtypes.common import is_list_like, is_nested_list_like from pandas.core.dtypes.inference import is_array_like from pandas.core.tools.datetimes import ( ArrayConvertible, @@ -61,7 +62,6 @@ # add this line to make doctests runnable from snowflake.snowpark.modin import pandas as pd # noqa: F401 -from snowflake.snowpark.modin.pandas.base import BasePandasDataset from snowflake.snowpark.modin.pandas.dataframe import DataFrame from snowflake.snowpark.modin.pandas.series import Series from snowflake.snowpark.modin.pandas.utils import ( @@ -1742,16 +1742,13 @@ def to_datetime( The default behaviour (``utc=False``) is as follows: - - Timezone-naive inputs are converted to timezone-naive :class:`~snowflake.snowpark.modin.pandas.Series`: + - Timezone-naive inputs are kept as timezone-naive :class:`~snowflake.snowpark.modin.pandas.DatetimeIndex`: - >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00:15']) + >>> pd.to_datetime(['2018-10-26 12:00:00', '2018-10-26 13:00:15']) DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'], dtype='datetime64[ns]', freq=None) - - Timezone-aware inputs *with constant time offset* are still converted to - timezone-naive :class:`~snowflake.snowpark.modin.pandas.Series` by default. - >>> pd.to_datetime(['2018-10-26 12:00:00 -0500', '2018-10-26 13:00:00 -0500']) - DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:00'], dtype='datetime64[ns]', freq=None) + DatetimeIndex(['2018-10-26 10:00:00-07:00', '2018-10-26 11:00:00-07:00'], dtype='datetime64[ns, America/Los_Angeles]', freq=None) - Use right format to convert to timezone-aware type (Note that when call Snowpark pandas API to_pandas() the timezone-aware output will always be converted to session timezone): @@ -1763,17 +1760,17 @@ def to_datetime( issued from a timezone with daylight savings, such as Europe/Paris): >>> pd.to_datetime(['2020-10-25 02:00:00 +0200', '2020-10-25 04:00:00 +0100']) - DatetimeIndex(['2020-10-25 02:00:00', '2020-10-25 04:00:00'], dtype='datetime64[ns]', freq=None) + Index(['2020-10-24 17:00:00-07:00', '2020-10-24 20:00:00-07:00'], dtype='datetime64[ns]') >>> pd.to_datetime(['2020-10-25 02:00:00 +0200', '2020-10-25 04:00:00 +0100'], format="%Y-%m-%d %H:%M:%S %z") - DatetimeIndex(['2020-10-24 17:00:00-07:00', '2020-10-24 20:00:00-07:00'], dtype='datetime64[ns, America/Los_Angeles]', freq=None) + Index(['2020-10-24 17:00:00-07:00', '2020-10-24 20:00:00-07:00'], dtype='datetime64[ns]') Setting ``utc=True`` makes sure always convert to timezone-aware outputs: - Timezone-naive inputs are *localized* based on the session timezone >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True) - DatetimeIndex(['2018-10-26 12:00:00-07:00', '2018-10-26 13:00:00-07:00'], dtype='datetime64[ns, America/Los_Angeles]', freq=None) + DatetimeIndex(['2018-10-26 05:00:00-07:00', '2018-10-26 06:00:00-07:00'], dtype='datetime64[ns, America/Los_Angeles]', freq=None) - Timezone-aware inputs are *converted* to session timezone @@ -1784,8 +1781,28 @@ def to_datetime( # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py raise_if_native_pandas_objects(arg) - if arg is None: - return None # same as pandas + if not isinstance(arg, (DataFrame, Series, pd.Index)): + # use pandas.to_datetime to convert local data to datetime + res = pandas.to_datetime( + arg, + errors, + dayfirst, + yearfirst, + utc, + format, + exact, + unit, + infer_datetime_format, + origin, + cache, + ) + if isinstance(res, pandas.Series): + res = pd.Series(res) + elif not is_scalar(res): + res = pd.Index(res) + return res + + # handle modin objs if unit and unit not in VALID_TO_DATETIME_UNIT: raise ValueError(f"Unrecognized unit {unit}") @@ -1795,15 +1812,8 @@ def to_datetime( argument="cache", message="cache parameter is ignored with Snowflake backend, i.e., no caching will be applied", ) - arg_is_scalar = is_scalar(arg) - if not isinstance(arg, (DataFrame, Series, pd.Index)): - # Turn dictionary like arg into pd.DataFrame and list-like or scalar to - # pd.Index. - arg = [arg] if arg_is_scalar else arg - arg = DataFrame(arg) if isinstance(arg, dict) else pd.Index(arg) - - series_or_index = arg._to_datetime( + return arg._to_datetime( errors=errors, dayfirst=dayfirst, yearfirst=yearfirst, @@ -1814,13 +1824,6 @@ def to_datetime( infer_datetime_format=infer_datetime_format, origin=origin, ) - if arg_is_scalar: - # Calling squeeze directly on Snowpark pandas Series makes an unnecessary - # count sql call. To avoid that we convert Snowpark pandas Series to Native - # pandas series first. - # Note: When arg_is_scalar is True 'series_or_index' is always an Index. - return series_or_index.to_series().to_pandas().squeeze() - return series_or_index @snowpark_pandas_telemetry_standalone_function_decorator @@ -1979,8 +1982,6 @@ def melt( @snowpark_pandas_telemetry_standalone_function_decorator -@pandas_module_level_function_not_implemented() -@_inherit_docstrings(pandas.crosstab, apilink="pandas.crosstab") def crosstab( index, columns, @@ -1995,21 +1996,319 @@ def crosstab( ) -> DataFrame: # noqa: PR01, RT01, D200 """ Compute a simple cross tabulation of two (or more) factors. + + By default, computes a frequency table of the factors unless an array + of values and an aggregation function are passed. + + Parameters + ---------- + index : array-like, Series, or list of arrays/Series + Values to group by in the rows. + columns : array-like, Series, or list of arrays/Series + Values to group by in the columns. + values : array-like, optional + Array of values to aggregate according to the factors. + Requires aggfunc be specified. + rownames : sequence, default None + If passed, must match number of row arrays passed. + colnames : sequence, default None + If passed, must match number of column arrays passed. + aggfunc : function, optional + If specified, requires values be specified as well. + margins : bool, default False + Add row/column margins (subtotals). + margins_name : str, default 'All' + Name of the row/column that will contain the totals when margins is True. + dropna : bool, default True + Do not include columns whose entries are all NaN. + + normalize : bool, {'all', 'index', 'columns'}, or {0,1}, default False + Normalize by dividing all values by the sum of values. + + * If passed 'all' or True, will normalize over all values. + * If passed 'index' will normalize over each row. + * If passed 'columns' will normalize over each column. + * If margins is True, will also normalize margin values. + + Returns + ------- + Snowpark pandas :class:`~snowflake.snowpark.modin.pandas.DataFrame` + Cross tabulation of the data. + + Notes + ----- + + Raises NotImplementedError if aggfunc is not one of "count", "mean", "min", "max", or "sum", or + margins is True, normalize is True or all, and values is passed. + + Examples + -------- + >>> a = np.array(["foo", "foo", "foo", "foo", "bar", "bar", + ... "bar", "bar", "foo", "foo", "foo"], dtype=object) + >>> b = np.array(["one", "one", "one", "two", "one", "one", + ... "one", "two", "two", "two", "one"], dtype=object) + >>> c = np.array(["dull", "dull", "shiny", "dull", "dull", "shiny", + ... "shiny", "dull", "shiny", "shiny", "shiny"], + ... dtype=object) + >>> pd.crosstab(a, [b, c], rownames=['a'], colnames=['b', 'c']) # doctest: +NORMALIZE_WHITESPACE + b one two + c dull shiny dull shiny + a + bar 1 2 1 0 + foo 2 2 1 2 """ - # TODO: SNOW-1063345: Modin upgrade - modin.pandas functions in general.py - pandas_crosstab = pandas.crosstab( - index, - columns, - values, - rownames, - colnames, - aggfunc, - margins, - margins_name, - dropna, - normalize, + if values is None and aggfunc is not None: + raise ValueError("aggfunc cannot be used without values.") + + if values is not None and aggfunc is None: + raise ValueError("values cannot be used without an aggfunc.") + + if not is_nested_list_like(index): + index = [index] + if not is_nested_list_like(columns): + columns = [columns] + + if ( + values is not None + and margins is True + and (normalize is True or normalize == "all") + ): + raise NotImplementedError( + 'Snowpark pandas does not yet support passing in margins=True, normalize="all", and values.' + ) + + user_passed_rownames = rownames is not None + user_passed_colnames = colnames is not None + + from pandas.core.reshape.pivot import _build_names_mapper, _get_names + + def _get_names_wrapper(list_of_objs, names, prefix): + """ + Helper method to expand DataFrame objects containing + multiple columns into Series, since `_get_names` expects + one column per entry. + """ + expanded_list_of_objs = [] + for obj in list_of_objs: + if isinstance(obj, DataFrame): + for col in obj.columns: + expanded_list_of_objs.append(obj[col]) + else: + expanded_list_of_objs.append(obj) + return _get_names(expanded_list_of_objs, names, prefix) + + rownames = _get_names_wrapper(index, rownames, prefix="row") + colnames = _get_names_wrapper(columns, colnames, prefix="col") + + ( + rownames_mapper, + unique_rownames, + colnames_mapper, + unique_colnames, + ) = _build_names_mapper(rownames, colnames) + + pass_objs = [x for x in index + columns if isinstance(x, (Series, DataFrame))] + row_idx_names = None + col_idx_names = None + if pass_objs: + # If we have any Snowpark pandas objects in the index or columns, then we + # need to find the intersection of their indices, and only pick rows from + # the objects that have indices in the intersection of their indices. + # After we do that, we then need to append the non Snowpark pandas objects + # using the intersection of indices as the final index for the DataFrame object. + # First, we separate the objects into Snowpark pandas objects, and non-Snowpark + # pandas objects (while renaming them so that they have unique names). + rownames_idx = 0 + row_idx_names = [] + dfs = [] + arrays = [] + array_lengths = [] + for obj in index: + if isinstance(obj, Series): + row_idx_names.append(obj.name) + df = pd.DataFrame(obj) + df.columns = [unique_rownames[rownames_idx]] + rownames_idx += 1 + dfs.append(df) + elif isinstance(obj, DataFrame): + row_idx_names.extend(obj.columns) + obj.columns = unique_rownames[ + rownames_idx : rownames_idx + len(obj.columns) + ] + rownames_idx += len(obj.columns) + dfs.append(obj) + else: + row_idx_names.append(None) + array_lengths.append(len(obj)) + df = pd.DataFrame(obj) + df.columns = unique_rownames[ + rownames_idx : rownames_idx + len(df.columns) + ] + rownames_idx += len(df.columns) + arrays.append(df) + + colnames_idx = 0 + col_idx_names = [] + for obj in columns: + if isinstance(obj, Series): + col_idx_names.append(obj.name) + df = pd.DataFrame(obj) + df.columns = [unique_colnames[colnames_idx]] + colnames_idx += 1 + dfs.append(df) + elif isinstance(obj, DataFrame): + col_idx_names.extend(obj.columns) + obj.columns = unique_colnames[ + colnames_idx : colnames_idx + len(obj.columns) + ] + colnames_idx += len(obj.columns) + dfs.append(obj) + else: + col_idx_names.append(None) + array_lengths.append(len(obj)) + df = pd.DataFrame(obj) + df.columns = unique_colnames[ + colnames_idx : colnames_idx + len(df.columns) + ] + colnames_idx += len(df.columns) + arrays.append(df) + + if len(set(array_lengths)) > 1: + raise ValueError("All arrays must be of the same length") + + # Now, we have two lists - a list of Snowpark pandas objects, and a list of objects + # that were not passed in as Snowpark pandas objects, but that we have converted + # to Snowpark pandas objects to give them column names. We can perform inner joins + # on the dfs list to get a DataFrame with the final index (that is only an intersection + # of indices.) + df = dfs[0] + for right in dfs[1:]: + df = df.merge(right, left_index=True, right_index=True) + if len(arrays) > 0: + index = df.index + right_df = pd.concat(arrays, axis=1) + # Increases query count by 1, but necessary for error checking. + index_length = len(df) + if index_length != array_lengths[0]: + raise ValueError( + f"Length mismatch: Expected {array_lengths[0]} rows, received array of length {index_length}" + ) + right_df.index = index + df = df.merge(right_df, left_index=True, right_index=True) + else: + data = { + **dict(zip(unique_rownames, index)), + **dict(zip(unique_colnames, columns)), + } + df = DataFrame(data) + + if values is None: + df["__dummy__"] = 0 + kwargs = {"aggfunc": "count"} + else: + df["__dummy__"] = values + kwargs = {"aggfunc": aggfunc} + + table = df.pivot_table( + "__dummy__", + index=unique_rownames, + columns=unique_colnames, + margins=margins, + margins_name=margins_name, + dropna=dropna, + **kwargs, # type: ignore[arg-type] ) - return DataFrame(pandas_crosstab) + + if row_idx_names is not None and not user_passed_rownames: + table.index = table.index.set_names(row_idx_names) + + if col_idx_names is not None and not user_passed_colnames: + table.columns = table.columns.set_names(col_idx_names) + + if aggfunc is None: + # If no aggfunc is provided, we are computing frequencies. Since we use + # pivot_table above, pairs that are not observed will get a NaN value, + # so we need to fill all NaN values with 0. + table = table.fillna(0) + + # We must explicitly check that the value of normalize is not False here, + # as a valid value of normalize is `0` (for normalizing index). + if normalize is not False: + if normalize not in [0, 1, "index", "columns", "all", True]: + raise ValueError("Not a valid normalize argument") + if normalize is True: + normalize = "all" + normalize = {0: "index", 1: "columns"}.get(normalize, normalize) + + # Actual Normalizations + normalizers: dict[bool | str, Callable] = { + "all": lambda x: x / x.sum(axis=0).sum(), + "columns": lambda x: x / x.sum(), + "index": lambda x: x.div(x.sum(axis=1), axis="index"), + } + + if margins is False: + + f = normalizers[normalize] + names = table.columns.names + table = f(table) + table.columns.names = names + table = table.fillna(0) + else: + # keep index and column of pivoted table + table_index = table.index + table_columns = table.columns + + column_margin = table.iloc[:-1, -1] + + if normalize == "columns": + # keep the core table + table = table.iloc[:-1, :-1] + + # Normalize core + f = normalizers[normalize] + table = f(table) + table = table.fillna(0) + # Fix Margins + column_margin = column_margin / column_margin.sum() + table = pd.concat([table, column_margin], axis=1) + table = table.fillna(0) + table.columns = table_columns + + elif normalize == "index": + table = table.iloc[:, :-1] + + # Normalize core + f = normalizers[normalize] + table = f(table) + table = table.fillna(0).reindex(index=table_index) + + elif normalize == "all": + # Normalize core + f = normalizers[normalize] + + # When we perform the normalization function, we take the sum over + # the rows, and divide every value by the sum. Since margins is included + # though, the result of the sum is actually 2 * the sum of the original + # values (since the margin itself is the sum of the original values), + # so we need to multiply by 2 here to account for that. + # The alternative would be to apply normalization to the main table + # and the index margins separately, but that would require additional joins + # to get the final table, which we want to avoid. + table = f(table.iloc[:, :-1]) * 2.0 + + column_margin = column_margin / column_margin.sum() + table = pd.concat([table, column_margin], axis=1) + table.iloc[-1, -1] = 1 + + table = table.fillna(0) + table.index = table_index + table.columns = table_columns + + table = table.rename_axis(index=rownames_mapper, axis=0) + table = table.rename_axis(columns=colnames_mapper, axis=1) + + return table # Adding docstring since pandas docs don't have web section for this function. diff --git a/src/snowflake/snowpark/modin/pandas/groupby.py b/src/snowflake/snowpark/modin/pandas/groupby.py index a373883317a..de89a48331b 100644 --- a/src/snowflake/snowpark/modin/pandas/groupby.py +++ b/src/snowflake/snowpark/modin/pandas/groupby.py @@ -49,6 +49,7 @@ create_groupby_transform_func, ) from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta +from snowflake.snowpark.modin.plugin._internal.utils import INDEX_LABEL from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import ( SnowflakeQueryCompiler, ) @@ -188,13 +189,28 @@ def sem(self, ddof=1): def value_counts( self, - subset=None, + subset: Optional[list[str]] = None, normalize: bool = False, sort: bool = True, ascending: bool = False, dropna: bool = True, ): - ErrorMessage.method_not_implemented_error(name="value_counts", class_="GroupBy") + query_compiler = self._query_compiler.groupby_value_counts( + by=self._by, + axis=self._axis, + groupby_kwargs=self._kwargs, + subset=subset, + normalize=normalize, + sort=sort, + ascending=ascending, + dropna=dropna, + ) + if self._as_index: + return pd.Series( + query_compiler=query_compiler, + name="proportion" if normalize else "count", + ) + return pd.DataFrame(query_compiler=query_compiler) def mean( self, @@ -1314,6 +1330,47 @@ def get_group(self, name, obj=None): name="get_group", class_="SeriesGroupBy" ) + def value_counts( + self, + subset: Optional[list[str]] = None, + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + bins: Optional[int] = None, + dropna: bool = True, + ): + # TODO: SNOW-1063349: Modin upgrade - modin.pandas.groupby.SeriesGroupBy functions + # Modin upstream defaults to pandas for this method, so we need to either override this or + # rewrite this logic to be friendlier to other backends. + # + # Unlike DataFrameGroupBy, SeriesGroupBy has an additional `bins` parameter. + qc = self._query_compiler + # The "by" list becomes the new index, which we then perform the group by on. We call + # reset_index to let the query compiler treat it as a data column so it can be grouped on. + if self._by is not None: + qc = ( + qc.set_index_from_series(pd.Series(self._by)._query_compiler) + .set_index_names([INDEX_LABEL]) + .reset_index() + ) + result_qc = qc.groupby_value_counts( + by=[INDEX_LABEL], + axis=self._axis, + groupby_kwargs=self._kwargs, + subset=subset, + normalize=normalize, + sort=sort, + ascending=ascending, + bins=bins, + dropna=dropna, + ) + # Reset the names in the MultiIndex + result_qc = result_qc.set_index_names([None] * result_qc.nlevels()) + return pd.Series( + query_compiler=result_qc, + name="proportion" if normalize else "count", + ) + def validate_groupby_args( by: Any, diff --git a/src/snowflake/snowpark/modin/pandas/indexing.py b/src/snowflake/snowpark/modin/pandas/indexing.py index 0ac62f504ce..c83e3fe41c4 100644 --- a/src/snowflake/snowpark/modin/pandas/indexing.py +++ b/src/snowflake/snowpark/modin/pandas/indexing.py @@ -43,6 +43,7 @@ import numpy as np import pandas +from modin.pandas.base import BasePandasDataset from pandas._libs.tslibs import Resolution, parsing from pandas._typing import AnyArrayLike, Scalar from pandas.api.types import is_bool, is_list_like @@ -58,7 +59,6 @@ import snowflake.snowpark.modin.pandas as pd import snowflake.snowpark.modin.pandas.utils as frontend_utils -from snowflake.snowpark.modin.pandas.base import BasePandasDataset from snowflake.snowpark.modin.pandas.dataframe import DataFrame from snowflake.snowpark.modin.pandas.series import ( SERIES_SETITEM_LIST_LIKE_KEY_AND_RANGE_LIKE_VALUE_ERROR_MESSAGE, diff --git a/src/snowflake/snowpark/modin/pandas/series.py b/src/snowflake/snowpark/modin/pandas/series.py index 1ce3ecfc997..6e1b93437a8 100644 --- a/src/snowflake/snowpark/modin/pandas/series.py +++ b/src/snowflake/snowpark/modin/pandas/series.py @@ -31,6 +31,7 @@ import numpy.typing as npt import pandas from modin.pandas.accessor import CachedAccessor, SparseAccessor +from modin.pandas.base import BasePandasDataset from modin.pandas.iterator import PartitionIterator from pandas._libs.lib import NoDefault, is_integer, no_default from pandas._typing import ( @@ -51,17 +52,18 @@ from pandas.core.series import _coerce_method from pandas.util._validators import validate_bool_kwarg -from snowflake.snowpark.modin.pandas.base import _ATTRS_NO_LOOKUP, BasePandasDataset from snowflake.snowpark.modin.pandas.utils import ( from_pandas, is_scalar, try_convert_index_to_native, ) +from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta from snowflake.snowpark.modin.plugin._typing import DropKeep, ListLike from snowflake.snowpark.modin.plugin.utils.error_message import ( ErrorMessage, series_not_implemented, ) +from snowflake.snowpark.modin.plugin.utils.frontend_constants import _ATTRS_NO_LOOKUP from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage from snowflake.snowpark.modin.utils import ( MODIN_UNNAMED_SERIES_LABEL, @@ -108,7 +110,7 @@ ], apilink="pandas.Series", ) -class Series(BasePandasDataset): +class Series(BasePandasDataset, metaclass=TelemetryMeta): _pandas_class = pandas.Series __array_priority__ = pandas.Series.__array_priority__ diff --git a/src/snowflake/snowpark/modin/pandas/utils.py b/src/snowflake/snowpark/modin/pandas/utils.py index f971e0ff964..32702c8b1a4 100644 --- a/src/snowflake/snowpark/modin/pandas/utils.py +++ b/src/snowflake/snowpark/modin/pandas/utils.py @@ -170,10 +170,9 @@ def is_scalar(obj): bool True if given object is scalar and False otherwise. """ + from modin.pandas.base import BasePandasDataset from pandas.api.types import is_scalar as pandas_is_scalar - from .base import BasePandasDataset - return not isinstance(obj, BasePandasDataset) and pandas_is_scalar(obj) diff --git a/src/snowflake/snowpark/modin/plugin/__init__.py b/src/snowflake/snowpark/modin/plugin/__init__.py index a76b9fe1613..c4172f26696 100644 --- a/src/snowflake/snowpark/modin/plugin/__init__.py +++ b/src/snowflake/snowpark/modin/plugin/__init__.py @@ -63,15 +63,23 @@ import modin.utils # type: ignore[import] # isort: skip # noqa: E402 import modin.pandas.series_utils # type: ignore[import] # isort: skip # noqa: E402 -modin.utils._inherit_docstrings( - docstrings.series_utils.StringMethods, - overwrite_existing=True, -)(modin.pandas.series_utils.StringMethods) - -modin.utils._inherit_docstrings( - docstrings.series_utils.CombinedDatetimelikeProperties, - overwrite_existing=True, -)(modin.pandas.series_utils.DatetimeProperties) +# TODO: SNOW-1643979 pull in fixes for +# https://github.com/modin-project/modin/issues/7113 and https://github.com/modin-project/modin/issues/7134 +# Upstream Modin has issues with certain docstring generation edge cases, so we should use our version instead +_inherit_docstrings = snowflake.snowpark.modin.utils._inherit_docstrings + +inherit_modules = [ + (docstrings.base.BasePandasDataset, modin.pandas.base.BasePandasDataset), + (docstrings.series_utils.StringMethods, modin.pandas.series_utils.StringMethods), + ( + docstrings.series_utils.CombinedDatetimelikeProperties, + modin.pandas.series_utils.DatetimeProperties, + ), +] + +for (doc_module, target_object) in inherit_modules: + _inherit_docstrings(doc_module, overwrite_existing=True)(target_object) + # Don't warn the user about our internal usage of private preview pivot # features. The user should have already been warned that Snowpark pandas diff --git a/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py index 846f3c64079..457bd388f2b 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py @@ -320,12 +320,26 @@ def _create_internal_frame_with_join_or_align_result( ) index_column_types.extend(right.cached_index_column_snowpark_pandas_types) + # If the result ordering column has the same ordering columns as the original left ordering columns, + # that means the original left and right shares the same base, and no actual snowpark join is applied because + # the join is applied on the ordering column or align on the same column. + # This behavior is guaranteed by the align and join methods provided by the OrderingDataframe, when the + # snowpark join is actually applied, the result ordering column will be a combination of + # left.ordering_column and right.ordering_column, plus some assist column. For example, the ordering column + # of left join is left.ordering_column + right.ordering_column. + no_join_applied = ( + result_ordered_frame.ordering_columns == left.ordered_dataframe.ordering_columns + ) + if key_coalesce_config: coalesce_column_identifiers = [] coalesce_column_values = [] for origin_left_col, origin_right_col, coalesce_config in zip( left_on, right_on, key_coalesce_config ): + if coalesce_config == JoinKeyCoalesceConfig.NONE: + continue + coalesce_col_type = None origin_left_col_type = ( left.snowflake_quoted_identifier_to_snowpark_pandas_type[ @@ -337,44 +351,60 @@ def _create_internal_frame_with_join_or_align_result( origin_right_col ] ) - if coalesce_config == JoinKeyCoalesceConfig.NONE: - continue + left_col = result_helper.map_left_quoted_identifiers([origin_left_col])[0] right_col = result_helper.map_right_quoted_identifiers([origin_right_col])[ 0 ] - # Coalescing is only required for 'outer' or 'asof' joins or align. - # For 'inner' and 'left' join we use left join keys and for 'right' join we - # use right join keys. - # For 'left' and 'coalesce' align we use left join keys. - if how in ("asof", "outer"): - # Generate an expression equivalent of - # "COALESCE('left_col', 'right_col') as 'left_col'" - coalesce_column_identifier = ( - result_ordered_frame.generate_snowflake_quoted_identifiers( - pandas_labels=[ - extract_pandas_label_from_snowflake_quoted_identifier( - left_col - ) - ], - )[0] - ) - coalesce_column_identifiers.append(coalesce_column_identifier) - coalesce_column_values.append(coalesce(left_col, right_col)) - if origin_left_col_type == origin_right_col_type: - coalesce_col_type = origin_left_col_type - elif how == "right": - # No coalescing required for 'right' join. Simply use right join key - # as output column. - coalesce_column_identifier = right_col - coalesce_col_type = origin_right_col_type - elif how in ("inner", "left", "coalesce"): - # No coalescing required for 'left' or 'inner' join and for 'left' or - # 'coalesce' align. Simply use left join key as output column. + + if no_join_applied and origin_left_col == origin_right_col: + # if no join is applied, that means the result dataframe, left dataframe and right dataframe + # shares the same base dataframe. If the original left column and original right column are the + # same column, no coalesce is needed, and we always tries to keep the left column to stay align + # with the original dataframe as much as possible to increase the chance for optimization for + # later operations, especially when the later operations are applied with dfs coming from + # the ame dataframe. + # Keep left column can help stay aligned with the original dataframe is because when there are + # conflict between left and right, deduplication always happens at right. For example, when join + # or align left dataframe [col1, col2] and right dataframe [col1, col2], the result dataframe will + # have columns [col1, col2, col1_a12b, col2_de3b], where col1_a12b, col2_de3b are just alias of + # col1 and col2 in right dataframe. + coalesce_config = JoinKeyCoalesceConfig.LEFT coalesce_column_identifier = left_col coalesce_col_type = origin_left_col_type else: - raise AssertionError(f"Unsupported join/align type {how}") + # Coalescing is only required for 'outer' or 'asof' joins or align. + # For 'inner' and 'left' join we use left join keys and for 'right' join we + # use right join keys. + # For 'left' and 'coalesce' align we use left join keys. + if how in ("asof", "outer"): + # Generate an expression equivalent of + # "COALESCE('left_col', 'right_col') as 'left_col'" + coalesce_column_identifier = ( + result_ordered_frame.generate_snowflake_quoted_identifiers( + pandas_labels=[ + extract_pandas_label_from_snowflake_quoted_identifier( + left_col + ) + ], + )[0] + ) + coalesce_column_identifiers.append(coalesce_column_identifier) + coalesce_column_values.append(coalesce(left_col, right_col)) + if origin_left_col_type == origin_right_col_type: + coalesce_col_type = origin_left_col_type + elif how == "right": + # No coalescing required for 'right' join. Simply use right join key + # as output column. + coalesce_column_identifier = right_col + coalesce_col_type = origin_right_col_type + elif how in ("inner", "left", "coalesce"): + # No coalescing required for 'left' or 'inner' join and for 'left' or + # 'coalesce' align. Simply use left join key as output column. + coalesce_column_identifier = left_col + coalesce_col_type = origin_left_col_type + else: + raise AssertionError(f"Unsupported join/align type {how}") if coalesce_config == JoinKeyCoalesceConfig.RIGHT: # swap left_col and right_col @@ -1187,15 +1217,8 @@ def align( # NULL NULL 2 NULL 4 e 2 coalesce_key_config = None inherit_join_index = InheritJoinIndex.FROM_LEFT - # When it is `outer` align, we need to coalesce the align columns. However, if the - # ordering columns of aligned result is the same as the left frame, that means the - # join columns of left and right matches, then there is no need to coalesce the join - # keys, simply inherent from left gives the correct result. - # Retaining the original columns also helps avoid unnecessary join in later steps. - if ( - how == "outer" - and aligned_ordered_frame.ordering_columns != left.ordering_columns - ): + # When it is `outer` align, we need to coalesce the align columns. + if how == "outer": coalesce_key_config = [JoinKeyCoalesceConfig.LEFT] * len(left_on) inherit_join_index = InheritJoinIndex.FROM_BOTH ( diff --git a/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py b/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py index 0a022b0d588..8057cf93885 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py @@ -495,6 +495,49 @@ def wrap(*args, **kwargs): # type: ignore } +def try_add_telemetry_to_attribute(attr_name: str, attr_value: Any) -> Any: + """ + Attempts to add telemetry to an attribute. + + If the attribute is callable with name in TELEMETRY_PRIVATE_METHODS, or is a callable that + starts with an underscore, the original attribute will be returned as-is. Otherwise, a version + of the method/property annotated with Snowpark pandas telemetry is returned. + """ + if callable(attr_value) and ( + not attr_name.startswith("_") or (attr_name in TELEMETRY_PRIVATE_METHODS) + ): + return snowpark_pandas_telemetry_method_decorator(attr_value) + elif isinstance(attr_value, property): + # wrap on getter and setter + return property( + snowpark_pandas_telemetry_method_decorator( + cast( + # add a cast because mypy doesn't recognize that + # non-None fget and __get__ are both callable + # arguments to snowpark_pandas_telemetry_method_decorator. + Callable, + attr_value.__get__ # pragma: no cover: we don't encounter this case in pandas or modin because every property has an fget method. + if attr_value.fget is None + else attr_value.fget, + ), + property_name=attr_name, + property_method_type=PropertyMethodType.FGET, + ), + snowpark_pandas_telemetry_method_decorator( + attr_value.__set__ if attr_value.fset is None else attr_value.fset, + property_name=attr_name, + property_method_type=PropertyMethodType.FSET, + ), + snowpark_pandas_telemetry_method_decorator( + attr_value.__delete__ if attr_value.fdel is None else attr_value.fdel, + property_name=attr_name, + property_method_type=PropertyMethodType.FDEL, + ), + doc=attr_value.__doc__, + ) + return attr_value + + class TelemetryMeta(type): def __new__( cls, name: str, bases: tuple, attrs: dict[str, Any] @@ -536,43 +579,5 @@ def __new__( The modified class with decorated methods. """ for attr_name, attr_value in attrs.items(): - if callable(attr_value) and ( - not attr_name.startswith("_") - or (attr_name in TELEMETRY_PRIVATE_METHODS) - ): - attrs[attr_name] = snowpark_pandas_telemetry_method_decorator( - attr_value - ) - elif isinstance(attr_value, property): - # wrap on getter and setter - attrs[attr_name] = property( - snowpark_pandas_telemetry_method_decorator( - cast( - # add a cast because mypy doesn't recognize that - # non-None fget and __get__ are both callable - # arguments to snowpark_pandas_telemetry_method_decorator. - Callable, - attr_value.__get__ # pragma: no cover: we don't encounter this case in pandas or modin because every property has an fget method. - if attr_value.fget is None - else attr_value.fget, - ), - property_name=attr_name, - property_method_type=PropertyMethodType.FGET, - ), - snowpark_pandas_telemetry_method_decorator( - attr_value.__set__ - if attr_value.fset is None - else attr_value.fset, - property_name=attr_name, - property_method_type=PropertyMethodType.FSET, - ), - snowpark_pandas_telemetry_method_decorator( - attr_value.__delete__ - if attr_value.fdel is None - else attr_value.fdel, - property_name=attr_name, - property_method_type=PropertyMethodType.FDEL, - ), - doc=attr_value.__doc__, - ) + attrs[attr_name] = try_add_telemetry_to_attribute(attr_name, attr_value) return type.__new__(cls, name, bases, attrs) diff --git a/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py index 4860baf4acb..c4873724789 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/timestamp_utils.py @@ -21,9 +21,9 @@ cast, convert_timezone, date_part, - floor, iff, to_decimal, + trunc, ) from snowflake.snowpark.modin.plugin._internal.utils import pandas_lit from snowflake.snowpark.modin.plugin.utils.error_message import ErrorMessage @@ -123,6 +123,11 @@ the specified time units. """ +AUTO_FORMAT_WARNING_MSG = """Snowflake automatic format detection is used when a format is not provided. +In this case Snowflake's auto format may yield different result values compared to pandas. +See https://docs.snowflake.com/en/sql-reference/date-time-input-output#supported-formats-for-auto-detection for details +""" + # TODO: SNOW-1127160: support other units VALID_TO_DATETIME_UNIT = ["D", "s", "ms", "us", "ns"] @@ -171,7 +176,7 @@ def col_to_timedelta(col: Column, unit: str) -> Column: if not td_unit: # Same error as native pandas. raise ValueError(f"invalid unit abbreviation: {unit}") - return cast(floor(col * TIMEDELTA_UNIT_MULTIPLIER[td_unit]), LongType()) + return trunc(col * TIMEDELTA_UNIT_MULTIPLIER[td_unit]) PANDAS_DATETIME_FORMAT_TO_SNOWFLAKE_MAPPING = { @@ -304,9 +309,7 @@ def generate_timestamp_col( if isinstance(datatype, (StringType, VariantType)): WarningMessage.mismatch_with_pandas( "to_datetime", - "Snowpark pandas to_datetime uses Snowflake's automatic format " - "detection to convert string to datetime when a format is not provided. " - "In this case Snowflake's auto format may yield different result values compared to pandas.", + AUTO_FORMAT_WARNING_MSG.replace("\n", ""), ) from snowflake.snowpark.modin.plugin._internal.type_utils import ( diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index bbebbec1783..848c5e438b3 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -12,7 +12,7 @@ import uuid from collections.abc import Hashable, Iterable, Mapping, Sequence from datetime import timedelta, tzinfo -from typing import Any, Callable, List, Literal, Optional, Tuple, Union, get_args +from typing import Any, Callable, List, Literal, Optional, Union, get_args import numpy as np import numpy.typing as npt @@ -135,6 +135,7 @@ to_variant, translate, trim, + trunc, uniform, upper, when, @@ -196,7 +197,10 @@ compute_bin_indices, preprocess_bins_for_cut, ) -from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame +from snowflake.snowpark.modin.plugin._internal.frame import ( + InternalFrame, + LabelIdentifierPair, +) from snowflake.snowpark.modin.plugin._internal.groupby_utils import ( GROUPBY_AGG_PRESERVES_SNOWPARK_PANDAS_TYPE, GROUPBY_AGG_WITH_NONE_SNOWPARK_PANDAS_TYPES, @@ -382,12 +386,20 @@ SUPPORTED_DT_FLOOR_CEIL_FREQS = ["day", "hour", "minute", "second"] +SECONDS_PER_DAY = 86400 +NANOSECONDS_PER_SECOND = 10**9 +NANOSECONDS_PER_MICROSECOND = 10**3 +MICROSECONDS_PER_SECOND = 10**6 +NANOSECONDS_PER_DAY = SECONDS_PER_DAY * NANOSECONDS_PER_SECOND + class SnowflakeQueryCompiler(BaseQueryCompiler): """based on: https://modin.readthedocs.io/en/0.11.0/flow/modin/backends/base/query_compiler.html this class is best explained by looking at https://github.com/modin-project/modin/blob/a8be482e644519f2823668210cec5cf1564deb7e/modin/experimental/core/storage_formats/hdk/query_compiler.py """ + lazy_execution = True + def __init__(self, frame: InternalFrame) -> None: """this stores internally a local pandas object (refactor this)""" assert frame is not None and isinstance( @@ -767,6 +779,7 @@ def execute(self) -> None: def to_numpy( self, dtype: Optional[npt.DTypeLike] = None, + copy: Optional[bool] = False, na_value: object = lib.no_default, **kwargs: Any, ) -> np.ndarray: @@ -774,6 +787,12 @@ def to_numpy( # i.e., for something like df.values internally to_numpy().flatten() is called # with flatten being another query compiler call into the numpy frontend layer. # here it's overwritten to actually perform numpy conversion, i.e. return an actual numpy object + if copy: + WarningMessage.ignored_argument( + operation="to_numpy", + argument="copy", + message="copy is ignored in Snowflake backend", + ) return self.to_pandas().to_numpy(dtype=dtype, na_value=na_value, **kwargs) def repartition(self, axis: Any = None) -> "SnowflakeQueryCompiler": @@ -1400,17 +1419,6 @@ def cache_result(self) -> "SnowflakeQueryCompiler": """ return SnowflakeQueryCompiler(self._modin_frame.persist_to_temporary_table()) - @property - def columns(self) -> native_pd.Index: - """ - Get pandas column labels. - - Returns: - an index containing all pandas column labels - """ - # TODO SNOW-837664: add more tests for df.columns - return self._modin_frame.data_columns_index - @snowpark_pandas_type_immutable_check def set_columns(self, new_pandas_labels: Axes) -> "SnowflakeQueryCompiler": """ @@ -1465,6 +1473,12 @@ def set_columns(self, new_pandas_labels: Axes) -> "SnowflakeQueryCompiler": ) return SnowflakeQueryCompiler(new_internal_frame) + # TODO SNOW-837664: add more tests for df.columns + def get_columns(self) -> native_pd.Index: + return self._modin_frame.data_columns_index + + columns: native_pd.Index = property(get_columns, set_columns) + def _shift_values( self, periods: int, axis: Union[Literal[0], Literal[1]], fill_value: Hashable ) -> "SnowflakeQueryCompiler": @@ -2272,9 +2286,82 @@ def reindex( else: return self._reindex_axis_1(labels=labels, **kwargs) + def is_monotonic_decreasing(self) -> "SnowflakeQueryCompiler": + """ + Returns a QueryCompiler containing only a column that checks for monotonically + decreasing values in the first data column of this QueryCompiler. + + Returns + ------- + SnowflakeQueryCompiler + QueryCompiler with column to ascertain whether data is monotonically decreasing. + """ + return self._check_monotonic(increasing=False) + + def is_monotonic_increasing(self) -> "SnowflakeQueryCompiler": + """ + Returns a QueryCompiler containing only a column that checks for monotonically + increasing values in the first data column of this QueryCompiler. + + Returns + ------- + SnowflakeQueryCompiler + QueryCompiler with column to ascertain whether data is monotonically increasing. + """ + return self._check_monotonic(increasing=True) + + def _check_monotonic(self, increasing: bool) -> "SnowflakeQueryCompiler": + """ + Returns a QueryCompiler containing only a column that checks for monotonically + decreasing or increasing values (depending on `increasing`) in the first data column of this QueryCompiler. + + Parameters + ---------- + increasing: bool + Whether to check for monotonically increasing or decreasing values. + + Returns + ------- + SnowflakeQueryCompiler + QueryCompiler with column to ascertain whether data is monotonically decreasing/increasing. + """ + col_to_check = self._modin_frame.data_column_snowflake_quoted_identifiers[0] + ( + new_qc, + monotonic_increasing_snowflake_quoted_identifier, + monotonic_decreasing_snowflake_quoted_identifier, + ) = self._add_columns_for_monotonicity_checks( + col_to_check=col_to_check, + columns_to_add="increasing" if increasing else "decreasing", + ) + data_column_snowflake_quoted_identifiers = [] + if increasing: + data_column_snowflake_quoted_identifiers.append( + monotonic_increasing_snowflake_quoted_identifier + ) + else: + data_column_snowflake_quoted_identifiers.append( + monotonic_decreasing_snowflake_quoted_identifier + ) + new_modin_frame = new_qc._modin_frame + return SnowflakeQueryCompiler( + InternalFrame.create( + ordered_dataframe=new_modin_frame.ordered_dataframe.limit( + n=1, sort=False + ), + data_column_pandas_index_names=new_modin_frame.data_column_pandas_index_names, + data_column_pandas_labels=["monotonic_column"], + data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers, + index_column_pandas_labels=new_modin_frame.index_column_pandas_labels, + index_column_snowflake_quoted_identifiers=new_modin_frame.index_column_snowflake_quoted_identifiers, + data_column_types=None, + index_column_types=None, + ) + ) + def _add_columns_for_monotonicity_checks( - self, col_to_check: str - ) -> tuple["SnowflakeQueryCompiler", str, str]: + self, col_to_check: str, columns_to_add: Optional[str] = None + ) -> tuple["SnowflakeQueryCompiler", Optional[str], Optional[str]]: """ Adds columns that check for monotonicity (increasing or decreasing) in the specified column. @@ -2283,6 +2370,8 @@ def _add_columns_for_monotonicity_checks( ---------- col_to_check : str The Snowflake quoted identifier for the column whose monotonicity to check. + columns_to_add : str, optional + Whether or not to add all columns, and if not, which columns to add. Returns ------- @@ -2293,9 +2382,16 @@ def _add_columns_for_monotonicity_checks( """ self._raise_not_implemented_error_for_timedelta() + assert columns_to_add in [ + None, + "increasing", + "decreasing", + ], "Invalid value passed to function" modin_frame = self._modin_frame modin_frame = modin_frame.ensure_row_position_column() row_position_column = modin_frame.row_position_snowflake_quoted_identifier + monotonic_decreasing_snowflake_quoted_id = None + monotonic_increasing_snowflake_quoted_id = None modin_frame = modin_frame.append_column( "_index_lag_col", lag(col_to_check).over(Window.order_by(row_position_column)), @@ -2303,26 +2399,28 @@ def _add_columns_for_monotonicity_checks( lag_col_snowflake_quoted_id = ( modin_frame.data_column_snowflake_quoted_identifiers[-1] ) - modin_frame = modin_frame.append_column( - "_is_monotonic_decreasing", - coalesce( - min_(col(col_to_check) < col(lag_col_snowflake_quoted_id)).over(), - pandas_lit(False), - ), - ) - monotonic_decreasing_snowflake_quoted_id = ( - modin_frame.data_column_snowflake_quoted_identifiers[-1] - ) - modin_frame = modin_frame.append_column( - "_is_monotonic_increasing", - coalesce( - min_(col(col_to_check) > col(lag_col_snowflake_quoted_id)).over(), - pandas_lit(False), - ), - ) - monotonic_increasing_snowflake_quoted_id = ( - modin_frame.data_column_snowflake_quoted_identifiers[-1] - ) + if columns_to_add in [None, "decreasing"]: + modin_frame = modin_frame.append_column( + "_is_monotonic_decreasing", + coalesce( + min_(col(col_to_check) <= col(lag_col_snowflake_quoted_id)).over(), + pandas_lit(False), + ), + ) + monotonic_decreasing_snowflake_quoted_id = ( + modin_frame.data_column_snowflake_quoted_identifiers[-1] + ) + if columns_to_add in [None, "increasing"]: + modin_frame = modin_frame.append_column( + "_is_monotonic_increasing", + coalesce( + min_(col(col_to_check) >= col(lag_col_snowflake_quoted_id)).over(), + pandas_lit(False), + ), + ) + monotonic_increasing_snowflake_quoted_id = ( + modin_frame.data_column_snowflake_quoted_identifiers[-1] + ) data_column_pandas_labels = modin_frame.data_column_pandas_labels data_column_snowflake_quoted_identifiers = ( modin_frame.data_column_snowflake_quoted_identifiers @@ -2807,6 +2905,8 @@ def reset_index( Returns: A new SnowflakeQueryCompiler instance with updated index. """ + if allow_duplicates is no_default: + allow_duplicates = False # These levels will be moved from index columns to data columns levels_to_be_reset = self._modin_frame.parse_levels_to_integer_levels( level, allow_duplicates=False @@ -3007,9 +3107,11 @@ def first_last_valid_index( def sort_index( self, + *, axis: int, level: Optional[list[Union[str, int]]], ascending: Union[bool, list[bool]], + inplace: bool = False, kind: SortKind, na_position: NaPosition, sort_remaining: bool, @@ -3025,6 +3127,8 @@ def sort_index( level: If not None, sort on values in specified index level(s). ascending: A list of bools to represent ascending vs descending sort. Defaults to True. When the index is a MultiIndex the sort direction can be controlled for each level individually. + inplace: Whether or not the sort occurs in-place. This argument is ignored and only provided + for compatibility with Modin. kind: Choice of sorting algorithm. Perform stable sort if 'stable'. Defaults to unstable sort. Snowpark pandas ignores choice of sorting algorithm except 'stable'. na_position: Puts NaNs at the beginning if 'first'; 'last' puts NaNs at the end. Defaults to 'last' @@ -5024,6 +5128,161 @@ def groupby_all( drop=drop, ) + def groupby_value_counts( + self, + by: Any, + axis: int, + groupby_kwargs: dict[str, Any], + subset: Optional[list[str]], + normalize: bool = False, + sort: bool = True, + ascending: bool = False, + bins: Optional[int] = None, + dropna: bool = True, + ) -> "SnowflakeQueryCompiler": + level = groupby_kwargs.get("level", None) + as_index = groupby_kwargs.get("as_index", True) + groupby_sort = groupby_kwargs.get("sort", True) + is_supported = check_is_groupby_supported_by_snowflake(by, level, axis) + if not is_supported: + ErrorMessage.not_implemented( + f"Snowpark pandas GroupBy.value_counts {_GROUPBY_UNSUPPORTED_GROUPING_MESSAGE}" + ) + if bins is not None: + raise ErrorMessage.not_implemented("bins argument is not yet supported") + if not is_list_like(by): + by = [by] + if len(set(by) & set(subset or [])): + # Check for overlap between by and subset. Since column names may contain customer data, + # unlike pandas, we do not include the offending labels in the error message. + raise ValueError("Keys in subset cannot be in the groupby column keys") + if subset is not None: + subset_list = subset + else: + # If subset is unspecified, then all columns should be included. + subset_list = self._modin_frame.data_column_pandas_labels + # The grouping columns are always included in the subset. + # Furthermore, the columns of the output must have the grouping columns first, in the order + # that they were specified. + subset_list = by + list(filter(lambda label: label not in by, subset_list)) + + if as_index: + # When as_index=True, the result is a Series with a MultiIndex index. + result = self._value_counts_groupby( + by=subset_list, + # Use sort=False to preserve the original order + sort=False, + normalize=normalize, + ascending=False, + dropna=dropna, + normalize_within_groups=by, + ) + else: + # When as_index=False, the result is a DataFrame where count/proportion is appended as a new named column. + result = self._value_counts_groupby( + by=subset_list, + # Use sort=False to preserve the original order + sort=False, + normalize=normalize, + ascending=False, + dropna=dropna, + normalize_within_groups=by, + ).reset_index() + result = result.set_columns( + result._modin_frame.data_column_pandas_labels[:-1] + + ["proportion" if normalize else "count"] + ) + # pandas currently provides the following behaviors based on the different sort flags. + # These behaviors are not entirely consistent with documentation; see this issue for discussion: + # https://github.com/pandas-dev/pandas/issues/59307 + # + # Example data (using pandas 2.2.1 behavior): + # >>> df = pd.DataFrame({"X": ["B", "A", "A", "B", "B", "B"], "Y": [4, 1, 3, -2, -1, -1]}) + # + # 1. groupby(sort=True).value_counts(sort=True) + # Sort on non-grouping columns, then sort on frequencies, then sort on grouping columns. + # >>> df.groupby("X", sort=True).value_counts(sort=True) + # X Y + # A 1 1 + # 3 1 + # B -1 2 + # -2 1 + # 4 1 + # Name: count, dtype: int64 + # + # 2. groupby(sort=True).value_counts(sort=False) + # Sort on non-grouping columns, then sort on grouping columns. + # >>> df.groupby("X", sort=True).value_counts(sort=True) + # X Y + # X Y + # A 1 1 + # 3 1 + # B -2 1 + # -1 2 + # 4 1 + # Name: count, dtype: int64 + # + # 3. groupby(sort=False).value_counts(sort=True) + # Sort on frequencies. + # >>> df.groupby("X", sort=False).value_counts(sort=True) + # X Y + # B -1 2 + # 4 1 + # A 1 1 + # 3 1 + # B -2 1 + # Name: count, dtype: int64 + # + # 4. groupby(sort=False).value_counts(sort=False) + # Sort on nothing (entries match the order of the original frame). + # X Y + # B 4 1 + # A 1 1 + # 3 1 + # B -2 1 + # -1 2 + # Name: count, dtype: int64 + # + # Lastly, when `normalize` is set with groupby(sort=False).value_counts(sort=True, normalize=True), + # pandas will sort by the pre-normalization counts rather than the resulting proportions. As this + # is an uncommon edge case, we cannot handle this using existing QC methods efficiently, so we just + # update our testing code to account for this. + # See comment on issue: https://github.com/pandas-dev/pandas/issues/59307#issuecomment-2313767856 + sort_cols = [] + if groupby_sort: + # When groupby(sort=True), sort the result on the grouping columns + sort_cols = by + ascending_cols = [True] * len(sort_cols) + if sort: + # When sort=True, also sort on the count/proportion column (always the last) + sort_cols.append( + result._modin_frame.data_column_pandas_labels[-1], + ) + ascending_cols.append(ascending) + if groupby_sort: + # When groupby_sort=True, also sort by the non-grouping columns before sorting by + # the count/proportion column. The left-most column (nearest to the grouping columns + # is sorted on last). + # Exclude the grouping columns (always the first) from the sort. + if as_index: + # When as_index is true, the non-grouping columns are part of the index columns + columns_to_filter = result._modin_frame.index_column_pandas_labels + else: + # When as_index is false, the non-grouping columns are part of the data columns + columns_to_filter = result._modin_frame.data_column_pandas_labels + non_grouping_cols = [ + col_label for col_label in columns_to_filter if col_label not in by + ] + sort_cols.extend(non_grouping_cols) + ascending_cols.extend([True] * len(non_grouping_cols)) + return result.sort_rows_by_column_values( + columns=sort_cols, + ascending=ascending_cols, + kind="stable", + na_position="last", + ignore_index=not as_index, # When as_index=False, take the default positional index + ) + def _get_dummies_helper( self, column: Hashable, @@ -5442,11 +5701,14 @@ def agg( ) for agg_arg in agg_args } + pandas_labels = list(agg_col_map.keys()) + if self.is_multiindex(axis=1): + pandas_labels = [ + (label,) * len(self.columns.names) for label in pandas_labels + ] single_agg_func_query_compilers.append( SnowflakeQueryCompiler( - frame.project_columns( - list(agg_col_map.keys()), list(agg_col_map.values()) - ) + frame.project_columns(pandas_labels, list(agg_col_map.values())) ) ) else: # axis == 0 @@ -10859,6 +11121,12 @@ def is_multiindex(self, *, axis: int = 0) -> bool: """ return self._modin_frame.is_multiindex(axis=axis) + def abs(self) -> "SnowflakeQueryCompiler": + return self.unary_op("abs") + + def negative(self) -> "SnowflakeQueryCompiler": + return self.unary_op("__neg__") + def unary_op(self, op: str) -> "SnowflakeQueryCompiler": """ Applies a unary operation `op` on each element of the `SnowflakeQueryCompiler`. @@ -11502,11 +11770,13 @@ def value_counts( def _value_counts_groupby( self, - by: Union[List[Hashable], Tuple[Hashable, ...]], + by: Sequence[Hashable], normalize: bool, sort: bool, ascending: bool, dropna: bool, + *, + normalize_within_groups: Optional[list[str]] = None, ) -> "SnowflakeQueryCompiler": """ Helper method to obtain the frequency or number of unique values @@ -11528,6 +11798,10 @@ def _value_counts_groupby( Sort in ascending order. dropna : bool Don't include counts of NaN. + normalize_within_groups : list[str], optional + If set, the normalize parameter will normalize based on the specified groups + rather than the entire dataset. This parameter is exclusive to the Snowpark pandas + query compiler and is only used internally to implement groupby_value_counts. """ self._raise_not_implemented_error_for_timedelta() @@ -11557,9 +11831,21 @@ def _value_counts_groupby( # they are normalized to percentages as [2/(2+1+1), 1/(2+1+1), 1/(2+1+1)] = [0.5, 0.25, 0.25] # by default, ratio_to_report returns a decimal column, whereas pandas returns a float column if normalize: + if normalize_within_groups: + # If normalize_within_groups is set, then the denominator for ratio_to_report should + # be the size of each group instead. + normalize_snowflake_quoted_identifiers = [ + entry[0] + for entry in internal_frame.get_snowflake_quoted_identifiers_group_by_pandas_labels( + normalize_within_groups + ) + ] + window = Window.partition_by(normalize_snowflake_quoted_identifiers) + else: + window = None internal_frame = query_compiler._modin_frame.project_columns( [COUNT_LABEL], - builtin("ratio_to_report")(col(count_identifier)).over(), + builtin("ratio_to_report")(col(count_identifier)).over(window), ) count_identifier = internal_frame.data_column_snowflake_quoted_identifiers[ 0 @@ -13858,7 +14144,6 @@ def create_lazy_type_functions( assert len(right_result_data_identifiers) == 1, "other must be a Series" right = right_result_data_identifiers[0] right_datatype = right_datatypes[0] - # now replace in result frame identifiers with binary op result replace_mapping = {} snowpark_pandas_types = [] @@ -13880,10 +14165,19 @@ def create_lazy_type_functions( identifiers_to_keep = set( new_frame.index_column_snowflake_quoted_identifiers ) | set(update_result.old_id_to_new_id_mappings.values()) + self_is_column_mi = len(self._modin_frame.data_column_pandas_index_names) label_to_snowflake_quoted_identifier = [] snowflake_quoted_identifier_to_snowpark_pandas_type = {} for pair in new_frame.label_to_snowflake_quoted_identifier: if pair.snowflake_quoted_identifier in identifiers_to_keep: + if ( + self_is_column_mi + and isinstance(pair.label, tuple) + and isinstance(pair.label[0], tuple) + ): + pair = LabelIdentifierPair( + pair.label[0], pair.snowflake_quoted_identifier + ) label_to_snowflake_quoted_identifier.append(pair) snowflake_quoted_identifier_to_snowpark_pandas_type[ pair.snowflake_quoted_identifier @@ -13897,7 +14191,7 @@ def create_lazy_type_functions( label_to_snowflake_quoted_identifier ), num_index_columns=new_frame.num_index_columns, - data_column_index_names=new_frame.data_column_index_names, + data_column_index_names=self._modin_frame.data_column_index_names, snowflake_quoted_identifier_to_snowpark_pandas_type=snowflake_quoted_identifier_to_snowpark_pandas_type, ) @@ -14308,9 +14602,7 @@ def infer_sorted_column_labels( new_frame = InternalFrame.create( ordered_dataframe=expanded_ordered_frame, data_column_pandas_labels=sorted_column_labels, - data_column_pandas_index_names=[ - None - ], # operation removes column index name always. + data_column_pandas_index_names=self._modin_frame.data_column_pandas_index_names, data_column_snowflake_quoted_identifiers=frame.data_column_snowflake_quoted_identifiers + new_identifiers, index_column_pandas_labels=index_column_pandas_labels, @@ -14357,7 +14649,7 @@ def infer_sorted_column_labels( new_frame = InternalFrame.create( ordered_dataframe=expanded_ordered_frame, data_column_pandas_labels=expanded_data_column_pandas_labels, - data_column_pandas_index_names=[None], # operation removes names + data_column_pandas_index_names=self._modin_frame.data_column_pandas_index_names, data_column_snowflake_quoted_identifiers=expanded_data_column_snowflake_quoted_identifiers, index_column_pandas_labels=index_column_pandas_labels, index_column_snowflake_quoted_identifiers=frame.index_column_snowflake_quoted_identifiers, @@ -17498,3 +17790,45 @@ def tz_convert(self, *args: Any, **kwargs: Any) -> None: def tz_localize(self, *args: Any, **kwargs: Any) -> None: ErrorMessage.method_not_implemented_error("tz_convert", "BasePandasDataset") + + def timedelta_property( + self, property_name: str, include_index: bool = False + ) -> "SnowflakeQueryCompiler": + """ + Extract a specified component of from Timedelta. + + Parameters + ---------- + property : {'days', 'seconds', 'microseconds', 'nanoseconds'} + The component to extract. + include_index: Whether to include the index columns in the operation. + + Returns + ------- + A new SnowflakeQueryCompiler with the extracted component. + """ + if not include_index: + assert ( + len(self.columns) == 1 + ), "dt only works for series" # pragma: no cover + + # mapping from the property name to the corresponding snowpark function + property_to_func_map = { + "days": lambda column: trunc(column / NANOSECONDS_PER_DAY), + "seconds": lambda column: trunc(column / NANOSECONDS_PER_SECOND) + % SECONDS_PER_DAY, + "microseconds": lambda column: trunc(column / NANOSECONDS_PER_MICROSECOND) + % MICROSECONDS_PER_SECOND, + "nanoseconds": lambda column: column % NANOSECONDS_PER_MICROSECOND, + } + func = property_to_func_map.get(property_name) + if not func: + class_prefix = ( + "TimedeltaIndex" if include_index else "Series.dt" + ) # pragma: no cover + raise ErrorMessage.not_implemented( + f"Snowpark pandas doesn't yet support the property '{class_prefix}.{property_name}'" + ) # pragma: no cover + return SnowflakeQueryCompiler( + self._modin_frame.apply_snowpark_function_to_columns(func, include_index) + ) diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/base.py b/src/snowflake/snowpark/modin/plugin/docstrings/base.py index a6a0aff1af4..3ba4f2f2dab 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/base.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/base.py @@ -386,6 +386,25 @@ Series([], dtype: bool) """ +_get_set_index_doc = """ +{desc} + +{parameters_or_returns} + +Note +---- +When setting `DataFrame.index` or `Series.index` where the length of the +`Series`/`DataFrame` object does not match with the new index's length, +pandas raises a ValueError. Snowpark pandas does not raise this error; +this operation is valid. +When the `Series`/`DataFrame` object is longer than the new index, +the `Series`/`DataFrame`'s new index is filled with `NaN` values for +the "extra" elements. When the `Series`/`DataFrame` object is shorter than +the new index, the extra values in the new index are ignored—`Series` and +`DataFrame` stay the same length `n`, and use only the first `n` values of +the new index. +""" + class BasePandasDataset: """ @@ -3594,3 +3613,21 @@ def __array_function__(): BasePandasDataset The result of the ufunc applied to the `BasePandasDataset`. """ + + @doc( + _get_set_index_doc, + desc="Get the index for this `Series`/`DataFrame`.", + parameters_or_returns="Returns\n-------\nIndex\n The index for this `Series`/`DataFrame`.", + ) + def _get_index(): + pass + + @doc( + _get_set_index_doc, + desc="Set the index for this `Series`/`DataFrame`.", + parameters_or_returns="Parameters\n----------\nnew_index : Index\n The new index to set.", + ) + def _set_index(): + pass + + index = property(_get_index, _set_index) diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py index a42ef48eb94..f0c02aa0e65 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/dataframe.py @@ -13,6 +13,8 @@ _shared_docs, ) +from .base import BasePandasDataset + _doc_binary_op_kwargs = {"returns": "BasePandasDataset", "left": "BasePandasDataset"} @@ -49,7 +51,7 @@ } -class DataFrame: +class DataFrame(BasePandasDataset): """ Snowpark pandas representation of ``pandas.DataFrame`` with a lazily-evaluated relational dataset. @@ -3832,6 +3834,18 @@ def set_index(): DataFrame or None Changed row labels or None if ``inplace=True``. + Note + ---- + When performing ``DataFrame.set_index`` where the length of the + :class:`DataFrame` object does not match with the new index's length, + a ``ValueError`` is not raised. When the :class:`DataFrame` object is + longer than the new index, the :class:`DataFrame`'s new index is filled + with ``NaN`` values for the "extra" elements. When the :class:`DataFrame` + object is shorter than the new index, the extra values in the new index + are ignored—the :class:`DataFrame` stays the same length ``n``, + and uses only the first ``n`` values of the new index. + + See Also -------- DataFrame.reset_index : Opposite of set_index. diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/groupby.py b/src/snowflake/snowpark/modin/plugin/docstrings/groupby.py index 05d29f64850..0692647b3f7 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/groupby.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/groupby.py @@ -203,7 +203,108 @@ def sem(): pass def value_counts(): - pass + """ + Return a Series or DataFrame containing counts of unique rows. + + Parameters + ---------- + subset : list-like, optional + Columns to use when counting unique combinations. + + normalize : bool, default False + Return proportions rather than frequencies. + + Note that when `normalize=True`, `groupby` is called with `sort=False`, and `value_counts` + is called with `sort=True`, Snowpark pandas will order results differently from + native pandas. This occurs because native pandas sorts on frequencies before converting + them to proportions, while Snowpark pandas computes proportions within groups before sorting. + + See issue for details: https://github.com/pandas-dev/pandas/issues/59307 + + sort : bool, default True + Sort by frequencies. + + ascending : bool, default False + Sort in ascending order. + + dropna : bool, default True + Don't include counts of rows that contain NA values. + + Returns + ------- + :class:`~snowflake.snowpark.modin.pandas.Series` or :class:`~snowflake.snowpark.modin.pandas.DataFrame` + Series if the groupby as_index is True, otherwise DataFrame. + + Notes + ----- + - If the groupby as_index is True then the returned Series will have a MultiIndex with one level per input column. + - If the groupby as_index is False then the returned DataFrame will have an additional column with the value_counts. + The column is labelled 'count' or 'proportion', depending on the normalize parameter. + + By default, rows that contain any NA values are omitted from the result. + + By default, the result will be in descending order so that the first element of each group is the most frequently-occurring row. + + Examples + -------- + >>> df = pd.DataFrame({ + ... 'gender': ['male', 'male', 'female', 'male', 'female', 'male'], + ... 'education': ['low', 'medium', 'high', 'low', 'high', 'low'], + ... 'country': ['US', 'FR', 'US', 'FR', 'FR', 'FR'] + ... }) + + >>> df # doctest: +NORMALIZE_WHITESPACE + gender education country + 0 male low US + 1 male medium FR + 2 female high US + 3 male low FR + 4 female high FR + 5 male low FR + + >>> df.groupby('gender').value_counts() # doctest: +NORMALIZE_WHITESPACE + gender education country + female high FR 1 + US 1 + male low FR 2 + US 1 + medium FR 1 + Name: count, dtype: int64 + + >>> df.groupby('gender').value_counts(ascending=True) # doctest: +NORMALIZE_WHITESPACE + gender education country + female high FR 1 + US 1 + male low US 1 + medium FR 1 + low FR 2 + Name: count, dtype: int64 + + >>> df.groupby('gender').value_counts(normalize=True) # doctest: +NORMALIZE_WHITESPACE + gender education country + female high FR 0.50 + US 0.50 + male low FR 0.50 + US 0.25 + medium FR 0.25 + Name: proportion, dtype: float64 + + >>> df.groupby('gender', as_index=False).value_counts() # doctest: +NORMALIZE_WHITESPACE + gender education country count + 0 female high FR 1 + 1 female high US 1 + 2 male low FR 2 + 3 male low US 1 + 4 male medium FR 1 + + >>> df.groupby('gender', as_index=False).value_counts(normalize=True) # doctest: +NORMALIZE_WHITESPACE + gender education country proportion + 0 female high FR 0.50 + 1 female high US 0.50 + 2 male low FR 0.50 + 3 male low US 0.25 + 4 male medium FR 0.25 + """ def mean(): """ @@ -2103,8 +2204,45 @@ def size(): """ pass - def unique(self): + def unique(): pass def apply(): pass + + def value_counts(): + """ + Return a Series or DataFrame containing counts of unique rows. + + Parameters + ---------- + subset : list-like, optional + Columns to use when counting unique combinations. + + normalize : bool, default False + Return proportions rather than frequencies. + + Note that when `normalize=True`, `groupby` is called with `sort=False`, and `value_counts` + is called with `sort=True`, Snowpark pandas will order results differently from + native pandas. This occurs because native pandas sorts on frequencies before converting + them to proportions, while Snowpark pandas computes proportions within groups before sorting. + + See issue for details: https://github.com/pandas-dev/pandas/issues/59307 + + sort : bool, default True + Sort by frequencies. + + ascending : bool, default False + Sort in ascending order. + + bins : int, optional + Rather than count values, group them into half-open bins, a convenience for `pd.cut`, only works with numeric data. + This parameter is not yet supported in Snowpark pandas. + + dropna : bool, default True + Don't include counts of rows that contain NA values. + + Returns + ------- + :class:`~snowflake.snowpark.modin.pandas.Series` + """ diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/resample.py b/src/snowflake/snowpark/modin/plugin/docstrings/resample.py index b152fb9ed45..a1414b1ce18 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/resample.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/resample.py @@ -200,7 +200,7 @@ def ffill(): 2020-01-06 3 Freq: None, dtype: int64 - >>> lst2 = pd.to_datetime(['2023-01-03 1:00:00', '2023-01-04', '2023-01-05 23:00:00', '2023-01-06', '2023-01-07 2:00:00', '2023-01-10']) + >>> lst2 = pd.to_datetime(pd.Index(['2023-01-03 1:00:00', '2023-01-04', '2023-01-05 23:00:00', '2023-01-06', '2023-01-07 2:00:00', '2023-01-10'])) >>> ser2 = pd.Series([1, 2, 3, 4, None, 6], index=lst2) >>> ser2 2023-01-03 01:00:00 1.0 @@ -257,7 +257,7 @@ def ffill(): 2020-01-03 0 15 2020-01-06 2 17 - >>> index2 = pd.to_datetime(['2023-01-03 1:00:00', '2023-01-04', '2023-01-05 23:00:00', '2023-01-06', '2023-01-07 2:00:00', '2023-01-10']) + >>> index2 = pd.to_datetime(pd.Index(['2023-01-03 1:00:00', '2023-01-04', '2023-01-05 23:00:00', '2023-01-06', '2023-01-07 2:00:00', '2023-01-10'])) >>> df2 = pd.DataFrame({'a': range(len(index2)), ... 'b': range(len(index2) + 10, len(index2) * 2 + 10)}, ... index=index2) diff --git a/src/snowflake/snowpark/modin/plugin/docstrings/series.py b/src/snowflake/snowpark/modin/plugin/docstrings/series.py index 6e48a7e57f3..a8ab6a60c77 100644 --- a/src/snowflake/snowpark/modin/plugin/docstrings/series.py +++ b/src/snowflake/snowpark/modin/plugin/docstrings/series.py @@ -15,6 +15,8 @@ ) from snowflake.snowpark.modin.utils import _create_operator_docstring +from .base import BasePandasDataset + _shared_doc_kwargs = { "axes": "index", "klass": "Series", @@ -35,7 +37,7 @@ } -class Series: +class Series(BasePandasDataset): """ Snowpark pandas representation of `pandas.Series` with a lazily-evaluated relational dataset. @@ -3659,6 +3661,48 @@ def hasnans(): Return True if there are any NaNs. """ + @property + def is_monotonic_decreasing(): + """ + Return boolean if values in the object are monotonically decreasing. + + Returns + ------- + bool + Whether or not the Series is monotonically decreasing. + + Examples + -------- + >>> s = pd.Series([3, 2, 2, 1]) + >>> s.is_monotonic_decreasing + True + + >>> s = pd.Series([1, 2, 3]) + >>> s.is_monotonic_decreasing + False + """ + + @property + def is_monotonic_increasing(): + """ + Return boolean if values in the object are monotonically increasing. + + Returns + ------- + bool + Whether or not the Series is monotonically increasing. + + Examples + -------- + >>> s = pd.Series([1, 2, 2]) + >>> s.is_monotonic_increasing + True + + >>> s = pd.Series([3, 2, 1]) + >>> s.is_monotonic_increasing + False + """ + def isna(): """ Detect missing values. @@ -3719,18 +3763,6 @@ def isnull(): dtype: bool """ - @property - def is_monotonic_increasing(): - """ - Return True if values in the Series are monotonic_increasing. - """ - - @property - def is_monotonic_decreasing(): - """ - Return True if values in the Series are monotonic_decreasing. - """ - @property def is_unique(): """ diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_extensions.py b/src/snowflake/snowpark/modin/plugin/extensions/base_extensions.py new file mode 100644 index 00000000000..496136d736e --- /dev/null +++ b/src/snowflake/snowpark/modin/plugin/extensions/base_extensions.py @@ -0,0 +1,46 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +""" +File containing BasePandasDataset APIs defined in Snowpark pandas but not the Modin API layer. +""" + +from snowflake.snowpark.modin.plugin._internal.telemetry import ( + snowpark_pandas_telemetry_method_decorator, +) + +from .base_overrides import register_base_override + + +@register_base_override("__array_function__") +@snowpark_pandas_telemetry_method_decorator +def __array_function__(self, func: callable, types: tuple, args: tuple, kwargs: dict): + """ + Apply the `func` to the `BasePandasDataset`. + + Parameters + ---------- + func : np.func + The NumPy func to apply. + types : tuple + The types of the args. + args : tuple + The args to the func. + kwargs : dict + Additional keyword arguments. + + Returns + ------- + BasePandasDataset + The result of the ufunc applied to the `BasePandasDataset`. + """ + from snowflake.snowpark.modin.plugin.utils.numpy_to_pandas import ( + numpy_to_pandas_func_map, + ) + + if func.__name__ in numpy_to_pandas_func_map: + return numpy_to_pandas_func_map[func.__name__](*args, **kwargs) + else: + # per NEP18 we raise NotImplementedError so that numpy can intercept + return NotImplemented # pragma: no cover diff --git a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py index 332df757787..abbcb9bc762 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/base_overrides.py @@ -6,31 +6,123 @@ Methods defined on BasePandasDataset that are overridden in Snowpark pandas. Adding a method to this file should be done with discretion, and only when relevant changes cannot be made to the query compiler or upstream frontend to accommodate Snowpark pandas. + +If you must override a method in this file, please add a comment describing why it must be overridden, +and if possible, whether this can be reconciled with upstream Modin. """ from __future__ import annotations import pickle as pkl -from typing import Any +import warnings +from collections.abc import Sequence +from typing import Any, Callable, Hashable, Literal, Mapping, get_args +import modin.pandas as pd import numpy as np +import numpy.typing as npt import pandas from modin.pandas.base import BasePandasDataset -from pandas._libs.lib import no_default +from pandas._libs import lib +from pandas._libs.lib import NoDefault, is_bool, no_default from pandas._typing import ( + AggFuncType, + AnyArrayLike, + Axes, Axis, CompressionOptions, + FillnaOptions, + IgnoreRaise, + IndexKeyFunc, + IndexLabel, + Level, + NaPosition, + RandomState, + Scalar, StorageOptions, TimedeltaConvertibleTypes, + TimestampConvertibleTypes, +) +from pandas.core.common import apply_if_callable +from pandas.core.dtypes.common import ( + is_dict_like, + is_dtype_equal, + is_list_like, + is_numeric_dtype, + pandas_dtype, +) +from pandas.core.dtypes.inference import is_integer +from pandas.core.methods.describe import _refine_percentiles +from pandas.errors import SpecificationError +from pandas.util._validators import ( + validate_ascending, + validate_bool_kwarg, + validate_percentile, ) +import snowflake.snowpark.modin.pandas as spd from snowflake.snowpark.modin.pandas.api.extensions import ( register_dataframe_accessor, register_series_accessor, ) +from snowflake.snowpark.modin.pandas.utils import ( + ensure_index, + extract_validate_and_try_convert_named_aggs_from_kwargs, + get_as_shape_compatible_dataframe_or_series, + is_scalar, + raise_if_native_pandas_objects, + validate_and_try_convert_agg_func_arg_func_to_str, +) from snowflake.snowpark.modin.plugin._internal.telemetry import ( snowpark_pandas_telemetry_method_decorator, + try_add_telemetry_to_attribute, +) +from snowflake.snowpark.modin.plugin._typing import ListLike +from snowflake.snowpark.modin.plugin.utils.error_message import ( + ErrorMessage, + base_not_implemented, ) -from snowflake.snowpark.modin.plugin.utils.error_message import base_not_implemented +from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage +from snowflake.snowpark.modin.utils import validate_int_kwarg + + +def register_base_override(method_name: str): + """ + Decorator function to override a method on BasePandasDataset. Since Modin does not provide a mechanism + for directly overriding methods on BasePandasDataset, we mock this by performing the override on + DataFrame and Series, and manually performing a `setattr` on the base class. These steps are necessary + to allow both the docstring extension and method dispatch to work properly. + + Methods annotated here also are automatically instrumented with Snowpark pandas telemetry. + """ + + def decorator(base_method: Any): + base_method = try_add_telemetry_to_attribute(method_name, base_method) + parent_method = getattr(BasePandasDataset, method_name, None) + if isinstance(parent_method, property): + parent_method = parent_method.fget + # If the method was not defined on Series/DataFrame and instead inherited from the superclass + # we need to override it as well because the MRO was already determined or something? + # TODO: SNOW-1063347 + # Since we still use the vendored version of Series and the overrides for the top-level + # namespace haven't been performed yet, we need to set properties on the vendored version + series_method = getattr(spd.series.Series, method_name, None) + if isinstance(series_method, property): + series_method = series_method.fget + if series_method is None or series_method is parent_method: + register_series_accessor(method_name)(base_method) + # TODO: SNOW-1063346 + # Since we still use the vendored version of DataFrame and the overrides for the top-level + # namespace haven't been performed yet, we need to set properties on the vendored version + df_method = getattr(spd.dataframe.DataFrame, method_name, None) + if isinstance(df_method, property): + df_method = df_method.fget + if df_method is None or df_method is parent_method: + register_dataframe_accessor(method_name)(base_method) + # Replace base method + setattr(BasePandasDataset, method_name, base_method) + return base_method + + return decorator def register_base_not_implemented(): @@ -303,3 +395,1901 @@ def truncate( @register_base_not_implemented() def __finalize__(self, other, method=None, **kwargs): pass # pragma: no cover + + +# === OVERRIDDEN METHODS === +# The below methods have their frontend implementations overridden compared to the version present +# in base.py. This is usually for one of the following reasons: +# 1. The underlying QC interface used differs from that of modin. Notably, this applies to aggregate +# and binary operations; further work is needed to refactor either our implementation or upstream +# modin's implementation. +# 2. Modin performs extra validation queries that perform extra SQL queries. Some of these are already +# fixed on main; see https://github.com/modin-project/modin/issues/7340 for details. +# 3. Upstream Modin defaults to pandas for some edge cases. Defaulting to pandas at the query compiler +# layer is acceptable because we can force the method to raise NotImplementedError, but if a method +# defaults at the frontend, Modin raises a warning and performs the operation by coercing the +# dataset to a native pandas object. Removing these is tracked by +# https://github.com/modin-project/modin/issues/7104 +# 4. Snowpark pandas uses different default arguments from modin. This occurs if some parameters are +# only partially supported (like `numeric_only=True` for `skew`), but this behavior should likewise +# be revisited. + +# `aggregate` for axis=1 is performed as a call to `BasePandasDataset.apply` in upstream Modin, +# which is unacceptable for Snowpark pandas. Upstream Modin should be changed to allow the query +# compiler or a different layer to control dispatch. +@register_base_override("aggregate") +def aggregate( + self, func: AggFuncType = None, axis: Axis | None = 0, *args: Any, **kwargs: Any +): + """ + Aggregate using one or more operations over the specified axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + from snowflake.snowpark.modin.pandas import Series + + origin_axis = axis + axis = self._get_axis_number(axis) + + if axis == 1 and isinstance(self, Series): + raise ValueError(f"No axis named {origin_axis} for object type Series") + + if len(self._query_compiler.columns) == 0: + # native pandas raise error with message "no result", here we raise a more readable error. + raise ValueError("No column to aggregate on.") + + # If we are using named kwargs, then we do not clear the kwargs (need them in the QC for processing + # order, as well as formatting error messages.) + uses_named_kwargs = False + # If aggregate is called on a Series, named aggregations can be passed in via a dictionary + # to func. + if func is None or (is_dict_like(func) and not self._is_dataframe): + if axis == 1: + raise ValueError( + "`func` must not be `None` when `axis=1`. Named aggregations are not supported with `axis=1`." + ) + if func is not None: + # If named aggregations are passed in via a dictionary to func, then we + # ignore the kwargs. + if any(is_dict_like(value) for value in func.values()): + # We can only get to this codepath if self is a Series, and func is a dictionary. + # In this case, if any of the values of func are themselves dictionaries, we must raise + # a Specification Error, as that is what pandas does. + raise SpecificationError("nested renamer is not supported") + kwargs = func + func = extract_validate_and_try_convert_named_aggs_from_kwargs( + self, allow_duplication=False, axis=axis, **kwargs + ) + uses_named_kwargs = True + else: + func = validate_and_try_convert_agg_func_arg_func_to_str( + agg_func=func, + obj=self, + allow_duplication=False, + axis=axis, + ) + + # This is to stay consistent with pandas result format, when the func is single + # aggregation function in format of callable or str, reduce the result dimension to + # convert dataframe to series, or convert series to scalar. + # Note: When named aggregations are used, the result is not reduced, even if there + # is only a single function. + # needs_reduce_dimension cannot be True if we are using named aggregations, since + # the values for func in that case are either NamedTuples (AggFuncWithLabels) or + # lists of NamedTuples, both of which are list like. + need_reduce_dimension = ( + (callable(func) or isinstance(func, str)) + # A Series should be returned when a single scalar string/function aggregation function, or a + # dict of scalar string/functions is specified. In all other cases (including if the function + # is a 1-element list), the result is a DataFrame. + # + # The examples below have axis=1, but the same logic is applied for axis=0. + # >>> df = pd.DataFrame({"a": [0, 1], "b": [2, 3]}) + # + # single aggregation: return Series + # >>> df.agg("max", axis=1) + # 0 2 + # 1 3 + # dtype: int64 + # + # list of aggregations: return DF + # >>> df.agg(["max"], axis=1) + # max + # 0 2 + # 1 3 + # + # dict where all aggregations are strings: return Series + # >>> df.agg({1: "max", 0: "min"}, axis=1) + # 1 3 + # 0 0 + # dtype: int64 + # + # dict where one element is a list: return DF + # >>> df.agg({1: "max", 0: ["min"]}, axis=1) + # max min + # 1 3.0 NaN + # 0 NaN 0.0 + or ( + is_dict_like(func) + and all(not is_list_like(value) for value in func.values()) + ) + ) + + # If func is a dict, pandas will not respect kwargs for each aggregation function, and + # we should drop them before passing the to the query compiler. + # + # >>> native_pd.DataFrame({"a": [0, 1], "b": [np.nan, 0]}).agg("max", skipna=False, axis=1) + # 0 NaN + # 1 1.0 + # dtype: float64 + # >>> native_pd.DataFrame({"a": [0, 1], "b": [np.nan, 0]}).agg(["max"], skipna=False, axis=1) + # max + # 0 0.0 + # 1 1.0 + # >>> pd.DataFrame([[np.nan], [0]]).aggregate("count", skipna=True, axis=0) + # 0 1 + # dtype: int8 + # >>> pd.DataFrame([[np.nan], [0]]).count(skipna=True, axis=0) + # TypeError: got an unexpected keyword argument 'skipna' + if is_dict_like(func) and not uses_named_kwargs: + kwargs.clear() + + result = self.__constructor__( + query_compiler=self._query_compiler.agg( + func=func, + axis=axis, + args=args, + kwargs=kwargs, + ) + ) + + if need_reduce_dimension: + if self._is_dataframe: + result = Series(query_compiler=result._query_compiler) + + if isinstance(result, Series): + # When func is just "quantile" with a scalar q, result has quantile value as name + q = kwargs.get("q", 0.5) + if func == "quantile" and is_scalar(q): + result.name = q + else: + result.name = None + + # handle case for single scalar (same as result._reduce_dimension()) + if isinstance(self, Series): + return result.to_pandas().squeeze() + + return result + + +# `agg` is an alias of `aggregate`. +agg = aggregate +register_base_override("agg")(agg) + + +# `_agg_helper` is not defined in modin, and used by Snowpark pandas to do extra validation. +@register_base_override("_agg_helper") +def _agg_helper( + self, + func: str, + skipna: bool = True, + axis: int | None | NoDefault = no_default, + numeric_only: bool = False, + **kwargs: Any, +): + if not self._is_dataframe and numeric_only and not is_numeric_dtype(self.dtype): + # Series aggregations on non-numeric data do not support numeric_only: + # https://github.com/pandas-dev/pandas/blob/cece8c6579854f6b39b143e22c11cac56502c4fd/pandas/core/series.py#L6358 + raise TypeError( + f"Series.{func} does not allow numeric_only=True with non-numeric dtypes." + ) + axis = self._get_axis_number(axis) + numeric_only = validate_bool_kwarg(numeric_only, "numeric_only", none_allowed=True) + skipna = validate_bool_kwarg(skipna, "skipna", none_allowed=False) + agg_kwargs: dict[str, Any] = { + "numeric_only": numeric_only, + "skipna": skipna, + } + agg_kwargs.update(kwargs) + return self.aggregate(func=func, axis=axis, **agg_kwargs) + + +# See _agg_helper +@register_base_override("count") +def count( + self, + axis: Axis | None = 0, + numeric_only: bool = False, +): + """ + Count non-NA cells for `BasePandasDataset`. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self._agg_helper( + func="count", + axis=axis, + numeric_only=numeric_only, + ) + + +# See _agg_helper +@register_base_override("max") +def max( + self, + axis: Axis | None = 0, + skipna: bool = True, + numeric_only: bool = False, + **kwargs: Any, +): + """ + Return the maximum of the values over the requested axis. + """ + return self._agg_helper( + func="max", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("min") +def min( + self, + axis: Axis | None | NoDefault = no_default, + skipna: bool = True, + numeric_only: bool = False, + **kwargs, +): + """ + Return the minimum of the values over the requested axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self._agg_helper( + func="min", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("mean") +def mean( + self, + axis: Axis | None | NoDefault = no_default, + skipna: bool = True, + numeric_only: bool = False, + **kwargs: Any, +): + """ + Return the mean of the values over the requested axis. + """ + return self._agg_helper( + func="mean", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("median") +def median( + self, + axis: Axis | None | NoDefault = no_default, + skipna: bool = True, + numeric_only: bool = False, + **kwargs: Any, +): + """ + Return the mean of the values over the requested axis. + """ + return self._agg_helper( + func="median", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("std") +def std( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only: bool = False, + **kwargs, +): + """ + Return sample standard deviation over requested axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + kwargs.update({"ddof": ddof}) + return self._agg_helper( + func="std", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("sum") +def sum( + self, + axis: Axis | None = None, + skipna: bool = True, + numeric_only: bool = False, + min_count: int = 0, + **kwargs: Any, +): + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + min_count = validate_int_kwarg(min_count, "min_count") + kwargs.update({"min_count": min_count}) + return self._agg_helper( + func="sum", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# See _agg_helper +@register_base_override("var") +def var( + self, + axis: Axis | None = None, + skipna: bool = True, + ddof: int = 1, + numeric_only: bool = False, + **kwargs: Any, +): + """ + Return unbiased variance over requested axis. + """ + kwargs.update({"ddof": ddof}) + return self._agg_helper( + func="var", + axis=axis, + skipna=skipna, + numeric_only=numeric_only, + **kwargs, + ) + + +# Modin does not provide `MultiIndex` support and will default to pandas when `level` is specified, +# and allows binary ops against native pandas objects that Snowpark pandas prohibits. +@register_base_override("_binary_op") +def _binary_op( + self, + op: str, + other: BasePandasDataset, + axis: Axis = None, + level: Level | None = None, + fill_value: float | None = None, + **kwargs: Any, +): + """ + Do binary operation between two datasets. + + Parameters + ---------- + op : str + Name of binary operation. + other : modin.pandas.BasePandasDataset + Second operand of binary operation. + axis: Whether to compare by the index (0 or ‘index’) or columns. (1 or ‘columns’). + level: Broadcast across a level, matching Index values on the passed MultiIndex level. + fill_value: Fill existing missing (NaN) values, and any new element needed for + successful DataFrame alignment, with this value before computation. + If data in both corresponding DataFrame locations is missing the result will be missing. + only arithmetic binary operation has this parameter (e.g., add() has, but eq() doesn't have). + + kwargs can contain the following parameters passed in at the frontend: + func: Only used for `combine` method. Function that takes two series as inputs and + return a Series or a scalar. Used to merge the two dataframes column by columns. + + Returns + ------- + modin.pandas.BasePandasDataset + Result of binary operation. + """ + # In upstream modin, _axis indicates the operator will use the default axis + if kwargs.pop("_axis", None) is None: + if axis is not None: + axis = self._get_axis_number(axis) + else: + axis = 1 + else: + axis = 0 + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + raise_if_native_pandas_objects(other) + axis = self._get_axis_number(axis) + squeeze_self = isinstance(self, pd.Series) + + # pandas itself will ignore the axis argument when using Series.. + # Per default, it is set to axis=0. However, for the case of a Series interacting with + # a DataFrame the behavior is axis=1. Manually check here for this case and adjust the axis. + + is_lhs_series_and_rhs_dataframe = ( + True + if isinstance(self, pd.Series) and isinstance(other, pd.DataFrame) + else False + ) + + new_query_compiler = self._query_compiler.binary_op( + op=op, + other=other, + axis=1 if is_lhs_series_and_rhs_dataframe else axis, + level=level, + fill_value=fill_value, + squeeze_self=squeeze_self, + **kwargs, + ) + + from snowflake.snowpark.modin.pandas.dataframe import DataFrame + + # Modin Bug: https://github.com/modin-project/modin/issues/7236 + # For a Series interacting with a DataFrame, always return a DataFrame + return ( + DataFrame(query_compiler=new_query_compiler) + if is_lhs_series_and_rhs_dataframe + else self._create_or_update_from_compiler(new_query_compiler) + ) + + +# Current Modin does not use _dropna and instead defines `dropna` directly, but Snowpark pandas +# Series/DF still do. Snowpark pandas still needs to add support for the `ignore_index` parameter +# (added in pandas 2.0), and should be able to refactor to remove this override. +@register_base_override("_dropna") +def _dropna( + self, + axis: Axis = 0, + how: str | NoDefault = no_default, + thresh: int | NoDefault = no_default, + subset: IndexLabel = None, + inplace: bool = False, +): + inplace = validate_bool_kwarg(inplace, "inplace") + + if is_list_like(axis): + raise TypeError("supplying multiple axes to axis is no longer supported.") + + axis = self._get_axis_number(axis) + + if (how is not no_default) and (thresh is not no_default): + raise TypeError( + "You cannot set both the how and thresh arguments at the same time." + ) + + if how is no_default: + how = "any" + if how not in ["any", "all"]: + raise ValueError("invalid how option: %s" % how) + if subset is not None: + if axis == 1: + indices = self.index.get_indexer_for(subset) + check = indices == -1 + if check.any(): + raise KeyError(list(np.compress(check, subset))) + else: + indices = self.columns.get_indexer_for(subset) + check = indices == -1 + if check.any(): + raise KeyError(list(np.compress(check, subset))) + + new_query_compiler = self._query_compiler.dropna( + axis=axis, + how=how, + thresh=thresh, + subset=subset, + ) + return self._create_or_update_from_compiler(new_query_compiler, inplace) + + +# Snowpark pandas uses `self_is_series` instead of `squeeze_self` and `squeeze_value` to determine +# the shape of `self` and `value`. Further work is needed to reconcile these two approaches. +@register_base_override("fillna") +def fillna( + self, + self_is_series, + value: Hashable | Mapping | pd.Series | pd.DataFrame = None, + method: FillnaOptions | None = None, + axis: Axis | None = None, + inplace: bool = False, + limit: int | None = None, + downcast: dict | None = None, +): + """ + Fill NA/NaN values using the specified method. + + Parameters + ---------- + self_is_series : bool + If True then self contains a Series object, if False then self contains + a DataFrame object. + value : scalar, dict, Series, or DataFrame, default: None + Value to use to fill holes (e.g. 0), alternately a + dict/Series/DataFrame of values specifying which value to use for + each index (for a Series) or column (for a DataFrame). Values not + in the dict/Series/DataFrame will not be filled. This value cannot + be a list. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default: None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use next valid observation to fill gap. + axis : {None, 0, 1}, default: None + Axis along which to fill missing values. + inplace : bool, default: False + If True, fill in-place. Note: this will modify any + other views on this object (e.g., a no-copy slice for a column in a + DataFrame). + limit : int, default: None + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. Must be greater than 0 if not None. + downcast : dict, default: None + A dict of item->dtype of what to downcast if possible, + or the string 'infer' which will try to downcast to an appropriate + equal type (e.g. float64 to int64 if possible). + + Returns + ------- + Series, DataFrame or None + Object with missing values filled or None if ``inplace=True``. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + raise_if_native_pandas_objects(value) + inplace = validate_bool_kwarg(inplace, "inplace") + axis = self._get_axis_number(axis) + if isinstance(value, (list, tuple)): + raise TypeError( + '"value" parameter must be a scalar or dict, but ' + + f'you passed a "{type(value).__name__}"' + ) + if value is None and method is None: + # same as pandas + raise ValueError("Must specify a fill 'value' or 'method'.") + if value is not None and method is not None: + raise ValueError("Cannot specify both 'value' and 'method'.") + if method is not None and method not in ["backfill", "bfill", "pad", "ffill"]: + expecting = "pad (ffill) or backfill (bfill)" + msg = "Invalid fill method. Expecting {expecting}. Got {method}".format( + expecting=expecting, method=method + ) + raise ValueError(msg) + if limit is not None: + if not isinstance(limit, int): + raise ValueError("Limit must be an integer") + elif limit <= 0: + raise ValueError("Limit must be greater than 0") + + new_query_compiler = self._query_compiler.fillna( + self_is_series=self_is_series, + value=value, + method=method, + axis=axis, + limit=limit, + downcast=downcast, + ) + return self._create_or_update_from_compiler(new_query_compiler, inplace) + + +# Snowpark pandas passes the query compiler object from a BasePandasDataset, which Modin does not do. +@register_base_override("isin") +def isin( + self, values: BasePandasDataset | ListLike | dict[Hashable, ListLike] +) -> BasePandasDataset: # noqa: PR01, RT01, D200 + """ + Whether elements in `BasePandasDataset` are contained in `values`. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + + # Pass as query compiler if values is BasePandasDataset. + if isinstance(values, BasePandasDataset): + values = values._query_compiler + + # Convert non-dict values to List if values is neither List[Any] nor np.ndarray. SnowflakeQueryCompiler + # expects for the non-lazy case, where values is not a BasePandasDataset, the data to be materialized + # as list or numpy array. Because numpy may perform implicit type conversions, use here list to be more general. + elif not isinstance(values, dict) and ( + not isinstance(values, list) or not isinstance(values, np.ndarray) + ): + values = list(values) + + return self.__constructor__(query_compiler=self._query_compiler.isin(values=values)) + + +# Snowpark pandas uses the single `quantiles_along_axis0` query compiler method, while upstream +# Modin splits this into `quantile_for_single_value` and `quantile_for_list_of_values` calls. +# It should be possible to merge those two functions upstream and reconcile the implementations. +@register_base_override("quantile") +def quantile( + self, + q: Scalar | ListLike = 0.5, + axis: Axis = 0, + numeric_only: bool = False, + interpolation: Literal[ + "linear", "lower", "higher", "midpoint", "nearest" + ] = "linear", + method: Literal["single", "table"] = "single", +) -> float | BasePandasDataset: + """ + Return values at the given quantile over requested axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + axis = self._get_axis_number(axis) + + # TODO + # - SNOW-1008361: support axis=1 + # - SNOW-1008367: support when q is Snowpandas DF/Series (need to require QC interface to accept QC q values) + # - SNOW-1003587: support datetime/timedelta columns + + if axis == 1 or interpolation not in ["linear", "nearest"] or method != "single": + ErrorMessage.not_implemented( + f"quantile function with parameters axis={axis}, interpolation={interpolation}, method={method} not supported" + ) + + if not numeric_only: + # If not numeric_only and columns, then check all columns are either + # numeric, timestamp, or timedelta + # Check if dtype is numeric, timedelta ("m"), or datetime ("M") + if not axis and not all( + is_numeric_dtype(t) or lib.is_np_dtype(t, "mM") for t in self._get_dtypes() + ): + raise TypeError("can't multiply sequence by non-int of type 'float'") + # If over rows, then make sure that all dtypes are equal for not + # numeric_only + elif axis: + for i in range(1, len(self._get_dtypes())): + pre_dtype = self._get_dtypes()[i - 1] + curr_dtype = self._get_dtypes()[i] + if not is_dtype_equal(pre_dtype, curr_dtype): + raise TypeError( + "Cannot compare type '{}' with type '{}'".format( + pre_dtype, curr_dtype + ) + ) + else: + # Normally pandas returns this near the end of the quantile, but we + # can't afford the overhead of running the entire operation before + # we error. + if not any(is_numeric_dtype(t) for t in self._get_dtypes()): + raise ValueError("need at least one array to concatenate") + + # check that all qs are between 0 and 1 + validate_percentile(q) + axis = self._get_axis_number(axis) + query_compiler = self._query_compiler.quantiles_along_axis0( + q=q if is_list_like(q) else [q], + numeric_only=numeric_only, + interpolation=interpolation, + method=method, + ) + if is_list_like(q): + return self.__constructor__(query_compiler=query_compiler) + else: + # result is either a scalar or Series + result = self._reduce_dimension(query_compiler.transpose_single_row()) + if isinstance(result, BasePandasDataset): + result.name = q + return result + + +# Current Modin does not define this method. Snowpark pandas currently only uses it in +# `DataFrame.set_index`. Modin does not support MultiIndex, or have its own lazy index class, +# so we may need to keep this method for the foreseeable future. +@register_base_override("_to_series_list") +def _to_series_list(self, index: pd.Index) -> list[pd.Series]: + """ + Convert index to a list of series + Args: + index: can be single or multi index + + Returns: + the list of series + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + if isinstance(index, pd.MultiIndex): + return [ + pd.Series(index.get_level_values(level)) for level in range(index.nlevels) + ] + elif isinstance(index, pd.Index): + return [pd.Series(index)] + else: + raise Exception("invalid index: " + str(index)) + + +# Upstream modin defaults to pandas when `suffix` is provided. +@register_base_override("shift") +def shift( + self, + periods: int | Sequence[int] = 1, + freq=None, + axis: Axis = 0, + fill_value: Hashable = no_default, + suffix: str | None = None, +) -> BasePandasDataset: + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + if periods == 0 and freq is None: + # Check obvious case first, freq manipulates the index even for periods == 0 so check for it in addition. + return self.copy() + + # pandas compatible ValueError for freq='infer' + # TODO: Test as part of SNOW-1023324. + if freq == "infer": # pragma: no cover + if not hasattr(self, "freq") and not hasattr( # pragma: no cover + self, "inferred_freq" # pragma: no cover + ): # pragma: no cover + raise ValueError() # pragma: no cover + + axis = self._get_axis_number(axis) + + if fill_value == no_default: + fill_value = None + + new_query_compiler = self._query_compiler.shift( + periods, freq, axis, fill_value, suffix + ) + return self._create_or_update_from_compiler(new_query_compiler, False) + + +# Snowpark pandas supports only `numeric_only=True`, which is not the default value of the argument, +# so we have this overridden. We should revisit this behavior. +@register_base_override("skew") +def skew( + self, + axis: Axis | None | NoDefault = no_default, + skipna: bool = True, + numeric_only=True, + **kwargs, +): # noqa: PR01, RT01, D200 + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + """ + Return unbiased skew over requested axis. + """ + return self._stat_operation("skew", axis, skipna, numeric_only, **kwargs) + + +@register_base_override("resample") +def resample( + self, + rule, + axis: Axis = lib.no_default, + closed: str | None = None, + label: str | None = None, + convention: str = "start", + kind: str | None = None, + on: Level = None, + level: Level = None, + origin: str | TimestampConvertibleTypes = "start_day", + offset: TimedeltaConvertibleTypes | None = None, + group_keys=no_default, +): # noqa: PR01, RT01, D200 + """ + Resample time-series data. + """ + from snowflake.snowpark.modin.pandas.resample import Resampler + + if axis is not lib.no_default: # pragma: no cover + axis = self._get_axis_number(axis) + if axis == 1: + warnings.warn( + "DataFrame.resample with axis=1 is deprecated. Do " + + "`frame.T.resample(...)` without axis instead.", + FutureWarning, + stacklevel=1, + ) + else: + warnings.warn( + f"The 'axis' keyword in {type(self).__name__}.resample is " + + "deprecated and will be removed in a future version.", + FutureWarning, + stacklevel=1, + ) + else: + axis = 0 + + return Resampler( + dataframe=self, + rule=rule, + axis=axis, + closed=closed, + label=label, + convention=convention, + kind=kind, + on=on, + level=level, + origin=origin, + offset=offset, + group_keys=group_keys, + ) + + +# Snowpark pandas needs to return a custom Expanding window object. We cannot use the +# extensions module for this at the moment because modin performs a relative import of +# `from .window import Expanding`. +@register_base_override("expanding") +def expanding(self, min_periods=1, axis=0, method="single"): # noqa: PR01, RT01, D200 + """ + Provide expanding window calculations. + """ + from snowflake.snowpark.modin.pandas.window import Expanding + + if axis is not lib.no_default: + axis = self._get_axis_number(axis) + name = "expanding" + if axis == 1: + warnings.warn( + f"Support for axis=1 in {type(self).__name__}.{name} is " + + "deprecated and will be removed in a future version. " + + f"Use obj.T.{name}(...) instead", + FutureWarning, + stacklevel=1, + ) + else: + warnings.warn( + f"The 'axis' keyword in {type(self).__name__}.{name} is " + + "deprecated and will be removed in a future version. " + + "Call the method without the axis keyword instead.", + FutureWarning, + stacklevel=1, + ) + else: + axis = 0 + + return Expanding( + self, + min_periods=min_periods, + axis=axis, + method=method, + ) + + +# Same as Expanding: Snowpark pandas needs to return a custmo Window object. +@register_base_override("rolling") +def rolling( + self, + window, + min_periods: int | None = None, + center: bool = False, + win_type: str | None = None, + on: str | None = None, + axis: Axis = lib.no_default, + closed: str | None = None, + step: int | None = None, + method: str = "single", +): # noqa: PR01, RT01, D200 + """ + Provide rolling window calculations. + """ + if axis is not lib.no_default: + axis = self._get_axis_number(axis) + name = "rolling" + if axis == 1: + warnings.warn( + f"Support for axis=1 in {type(self).__name__}.{name} is " + + "deprecated and will be removed in a future version. " + + f"Use obj.T.{name}(...) instead", + FutureWarning, + stacklevel=1, + ) + else: # pragma: no cover + warnings.warn( + f"The 'axis' keyword in {type(self).__name__}.{name} is " + + "deprecated and will be removed in a future version. " + + "Call the method without the axis keyword instead.", + FutureWarning, + stacklevel=1, + ) + else: + axis = 0 + + if win_type is not None: + from snowflake.snowpark.modin.pandas.window import Window + + return Window( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + step=step, + method=method, + ) + from snowflake.snowpark.modin.pandas.window import Rolling + + return Rolling( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + step=step, + method=method, + ) + + +# Snowpark pandas uses a custom indexer object for all indexing methods. +@register_base_override("iloc") +@property +def iloc(self): + """ + Purely integer-location based indexing for selection by position. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + # TODO: SNOW-930028 enable all skipped doctests + from snowflake.snowpark.modin.pandas.indexing import _iLocIndexer + + return _iLocIndexer(self) + + +# Snowpark pandas uses a custom indexer object for all indexing methods. +@register_base_override("loc") +@property +def loc(self): + """ + Get a group of rows and columns by label(s) or a boolean array. + """ + # TODO: SNOW-935444 fix doctest where index key has name + # TODO: SNOW-933782 fix multiindex transpose bug, e.g., Name: (cobra, mark ii) => Name: ('cobra', 'mark ii') + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + from snowflake.snowpark.modin.pandas.indexing import _LocIndexer + + return _LocIndexer(self) + + +# Snowpark pandas uses a custom indexer object for all indexing methods. +@register_base_override("iat") +@property +def iat(self, axis=None): # noqa: PR01, RT01, D200 + """ + Get a single value for a row/column pair by integer position. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + from snowflake.snowpark.modin.pandas.indexing import _iAtIndexer + + return _iAtIndexer(self) + + +# Snowpark pandas uses a custom indexer object for all indexing methods. +@register_base_override("at") +@property +def at(self, axis=None): # noqa: PR01, RT01, D200 + """ + Get a single value for a row/column label pair. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + from snowflake.snowpark.modin.pandas.indexing import _AtIndexer + + return _AtIndexer(self) + + +# Snowpark pandas performs different dispatch logic; some changes may need to be upstreamed +# to fix edge case indexing behaviors. +@register_base_override("__getitem__") +def __getitem__(self, key): + """ + Retrieve dataset according to `key`. + + Parameters + ---------- + key : callable, scalar, slice, str or tuple + The global row index to retrieve data from. + + Returns + ------- + BasePandasDataset + Located dataset. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + key = apply_if_callable(key, self) + # If a slice is passed in, use .iloc[key]. + if isinstance(key, slice): + if (is_integer(key.start) or key.start is None) and ( + is_integer(key.stop) or key.stop is None + ): + return self.iloc[key] + else: + return self.loc[key] + + # If the object calling getitem is a Series, only use .loc[key] to filter index. + if isinstance(self, pd.Series): + return self.loc[key] + + # Sometimes the result of a callable is a DataFrame (e.g. df[df > 0]) - use where. + elif isinstance(key, pd.DataFrame): + return self.where(cond=key) + + # If the object is a boolean list-like object, use .loc[key] to filter index. + # The if statement is structured this way to avoid calling dtype and reduce query count. + if isinstance(key, pd.Series): + if pandas.api.types.is_bool_dtype(key.dtype): + return self.loc[key] + elif is_list_like(key): + if hasattr(key, "dtype"): + if pandas.api.types.is_bool_dtype(key.dtype): + return self.loc[key] + if (all(is_bool(k) for k in key)) and len(key) > 0: + return self.loc[key] + + # In all other cases, use .loc[:, key] to filter columns. + return self.loc[:, key] + + +# Snowpark pandas does extra argument validation, which may need to be upstreamed. +@register_base_override("sort_values") +def sort_values( + self, + by, + axis=0, + ascending=True, + inplace: bool = False, + kind="quicksort", + na_position="last", + ignore_index: bool = False, + key: IndexKeyFunc | None = None, +): # noqa: PR01, RT01, D200 + """ + Sort by the values along either axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + axis = self._get_axis_number(axis) + inplace = validate_bool_kwarg(inplace, "inplace") + ascending = validate_ascending(ascending) + if axis == 0: + # If any column is None raise KeyError (same a native pandas). + if by is None or (isinstance(by, list) and None in by): + # Same error message as native pandas. + raise KeyError(None) + if not isinstance(by, list): + by = [by] + + # Convert 'ascending' to sequence if needed. + if not isinstance(ascending, Sequence): + ascending = [ascending] * len(by) + if len(by) != len(ascending): + # Same error message as native pandas. + raise ValueError( + f"Length of ascending ({len(ascending)})" + f" != length of by ({len(by)})" + ) + + columns = self._query_compiler.columns.values.tolist() + index_names = self._query_compiler.get_index_names() + for by_col in by: + col_count = columns.count(by_col) + index_count = index_names.count(by_col) + if col_count == 0 and index_count == 0: + # Same error message as native pandas. + raise KeyError(by_col) + if col_count and index_count: + # Same error message as native pandas. + raise ValueError( + f"'{by_col}' is both an index level and a column label, which is ambiguous." + ) + if col_count > 1: + # Same error message as native pandas. + raise ValueError(f"The column label '{by_col}' is not unique.") + + if na_position not in get_args(NaPosition): + # Same error message as native pandas for invalid 'na_position' value. + raise ValueError(f"invalid na_position: {na_position}") + result = self._query_compiler.sort_rows_by_column_values( + by, + ascending=ascending, + kind=kind, + na_position=na_position, + ignore_index=ignore_index, + key=key, + ) + else: + result = self._query_compiler.sort_columns_by_row_values( + by, + ascending=ascending, + kind=kind, + na_position=na_position, + ignore_index=ignore_index, + key=key, + ) + return self._create_or_update_from_compiler(result, inplace) + + +# Modin does not define `where` on BasePandasDataset, and defaults to pandas at the frontend +# layer for Series. +@register_base_override("where") +def where( + self, + cond: BasePandasDataset | Callable | AnyArrayLike, + other: BasePandasDataset | Callable | Scalar | None = np.nan, + inplace: bool = False, + axis: Axis | None = None, + level: Level | None = None, +): + """ + Replace values where the condition is False. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + # TODO: SNOW-985670: Refactor `where` and `mask` + # will move pre-processing to QC layer. + inplace = validate_bool_kwarg(inplace, "inplace") + if cond is None: + raise ValueError("Array conditional must be same shape as self") + + cond = apply_if_callable(cond, self) + + if isinstance(cond, Callable): + raise NotImplementedError("Do not support callable for 'cond' parameter.") + + from snowflake.snowpark.modin.pandas import Series + + if isinstance(cond, Series): + cond._query_compiler._shape_hint = "column" + if isinstance(self, Series): + self._query_compiler._shape_hint = "column" + if isinstance(other, Series): + other._query_compiler._shape_hint = "column" + + if not isinstance(cond, BasePandasDataset): + cond = get_as_shape_compatible_dataframe_or_series(cond, self) + cond._query_compiler._shape_hint = "array" + + if other is not None: + other = apply_if_callable(other, self) + + if isinstance(other, np.ndarray): + other = get_as_shape_compatible_dataframe_or_series( + other, + self, + shape_mismatch_message="other must be the same shape as self when an ndarray", + ) + other._query_compiler._shape_hint = "array" + + if isinstance(other, BasePandasDataset): + other = other._query_compiler + + query_compiler = self._query_compiler.where( + cond._query_compiler, + other, + axis, + level, + ) + + return self._create_or_update_from_compiler(query_compiler, inplace) + + +# Snowpark pandas performs extra argument validation, some of which should be pushed down +# to the QC layer. +@register_base_override("mask") +def mask( + self, + cond: BasePandasDataset | Callable | AnyArrayLike, + other: BasePandasDataset | Callable | Scalar | None = np.nan, + inplace: bool = False, + axis: Axis | None = None, + level: Level | None = None, +): + """ + Replace values where the condition is True. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + # TODO: https://snowflakecomputing.atlassian.net/browse/SNOW-985670 + # will move pre-processing to QC layer. + inplace = validate_bool_kwarg(inplace, "inplace") + if cond is None: + raise ValueError("Array conditional must be same shape as self") + + cond = apply_if_callable(cond, self) + + if isinstance(cond, Callable): + raise NotImplementedError("Do not support callable for 'cond' parameter.") + + from snowflake.snowpark.modin.pandas import Series + + if isinstance(cond, Series): + cond._query_compiler._shape_hint = "column" + if isinstance(self, Series): + self._query_compiler._shape_hint = "column" + if isinstance(other, Series): + other._query_compiler._shape_hint = "column" + + if not isinstance(cond, BasePandasDataset): + cond = get_as_shape_compatible_dataframe_or_series(cond, self) + cond._query_compiler._shape_hint = "array" + + if other is not None: + other = apply_if_callable(other, self) + + if isinstance(other, np.ndarray): + other = get_as_shape_compatible_dataframe_or_series( + other, + self, + shape_mismatch_message="other must be the same shape as self when an ndarray", + ) + other._query_compiler._shape_hint = "array" + + if isinstance(other, BasePandasDataset): + other = other._query_compiler + + query_compiler = self._query_compiler.mask( + cond._query_compiler, + other, + axis, + level, + ) + + return self._create_or_update_from_compiler(query_compiler, inplace) + + +# Snowpark pandas uses a custom I/O dispatcher class. +@register_base_override("to_csv") +def to_csv( + self, + path_or_buf=None, + sep=",", + na_rep="", + float_format=None, + columns=None, + header=True, + index=True, + index_label=None, + mode="w", + encoding=None, + compression="infer", + quoting=None, + quotechar='"', + lineterminator=None, + chunksize=None, + date_format=None, + doublequote=True, + escapechar=None, + decimal=".", + errors: str = "strict", + storage_options: StorageOptions = None, +): # pragma: no cover + from snowflake.snowpark.modin.core.execution.dispatching.factories.dispatcher import ( + FactoryDispatcher, + ) + + return FactoryDispatcher.to_csv( + self._query_compiler, + path_or_buf=path_or_buf, + sep=sep, + na_rep=na_rep, + float_format=float_format, + columns=columns, + header=header, + index=index, + index_label=index_label, + mode=mode, + encoding=encoding, + compression=compression, + quoting=quoting, + quotechar=quotechar, + lineterminator=lineterminator, + chunksize=chunksize, + date_format=date_format, + doublequote=doublequote, + escapechar=escapechar, + decimal=decimal, + errors=errors, + storage_options=storage_options, + ) + + +# Modin performs extra argument validation and defaults to pandas for some edge cases. +@register_base_override("sample") +def sample( + self, + n: int | None = None, + frac: float | None = None, + replace: bool = False, + weights: str | np.ndarray | None = None, + random_state: RandomState | None = None, + axis: Axis | None = None, + ignore_index: bool = False, +): + """ + Return a random sample of items from an axis of object. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + if self._get_axis_number(axis): + if weights is not None and isinstance(weights, str): + raise ValueError( + "Strings can only be passed to weights when sampling from rows on a DataFrame" + ) + else: + if n is None and frac is None: + n = 1 + elif n is not None and frac is not None: + raise ValueError("Please enter a value for `frac` OR `n`, not both") + else: + if n is not None: + if n < 0: + raise ValueError( + "A negative number of rows requested. Please provide `n` >= 0." + ) + if n % 1 != 0: + raise ValueError("Only integers accepted as `n` values") + else: + if frac < 0: + raise ValueError( + "A negative number of rows requested. Please provide `frac` >= 0." + ) + + query_compiler = self._query_compiler.sample( + n, frac, replace, weights, random_state, axis, ignore_index + ) + return self.__constructor__(query_compiler=query_compiler) + + +# Modin performs an extra query calling self.isna() to raise a warning when fill_method is unspecified. +@register_base_override("pct_change") +def pct_change( + self, periods=1, fill_method=no_default, limit=no_default, freq=None, **kwargs +): # noqa: PR01, RT01, D200 + """ + Percentage change between the current and a prior element. + """ + if fill_method not in (lib.no_default, None) or limit is not lib.no_default: + warnings.warn( + "The 'fill_method' keyword being not None and the 'limit' keyword in " + + f"{type(self).__name__}.pct_change are deprecated and will be removed " + + "in a future version. Either fill in any non-leading NA values prior " + + "to calling pct_change or specify 'fill_method=None' to not fill NA " + + "values.", + FutureWarning, + stacklevel=1, + ) + if fill_method is lib.no_default: + warnings.warn( + f"The default fill_method='pad' in {type(self).__name__}.pct_change is " + + "deprecated and will be removed in a future version. Either fill in any " + + "non-leading NA values prior to calling pct_change or specify 'fill_method=None' " + + "to not fill NA values.", + FutureWarning, + stacklevel=1, + ) + fill_method = "pad" + + if limit is lib.no_default: + limit = None + + if "axis" in kwargs: + kwargs["axis"] = self._get_axis_number(kwargs["axis"]) + + # Attempting to match pandas error behavior here + if not isinstance(periods, int): + raise TypeError(f"periods must be an int. got {type(periods)} instead") + + # Attempting to match pandas error behavior here + for dtype in self._get_dtypes(): + if not is_numeric_dtype(dtype): + raise TypeError( + f"cannot perform pct_change on non-numeric column with dtype {dtype}" + ) + + return self.__constructor__( + query_compiler=self._query_compiler.pct_change( + periods=periods, + fill_method=fill_method, + limit=limit, + freq=freq, + **kwargs, + ) + ) + + +# Snowpark pandas has different `copy` behavior, and some different behavior with native series arguments. +@register_base_override("astype") +def astype( + self, + dtype: str | type | pd.Series | dict[str, type], + copy: bool = True, + errors: Literal["raise", "ignore"] = "raise", +) -> pd.DataFrame | pd.Series: + """ + Cast a Modin object to a specified dtype `dtype`. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + # dtype can be a series, a dict, or a scalar. If it's series or scalar, + # convert it to a dict before passing it to the query compiler. + raise_if_native_pandas_objects(dtype) + from snowflake.snowpark.modin.pandas import Series + + if isinstance(dtype, Series): + dtype = dtype.to_pandas() + if not dtype.index.is_unique: + raise ValueError( + "The new Series of types must have a unique index, i.e. " + + "it must be one-to-one mapping from column names to " + + " their new dtypes." + ) + dtype = dtype.to_dict() + # If we got a series or dict originally, dtype is a dict now. Its keys + # must be column names. + if isinstance(dtype, dict): + # Avoid materializing columns. The query compiler will handle errors where + # dtype dict includes keys that are not in columns. + col_dtypes = dtype + for col_name in col_dtypes: + if col_name not in self._query_compiler.columns: + raise KeyError( + "Only a column name can be used for the key in a dtype mappings argument. " + f"'{col_name}' not found in columns." + ) + else: + # Assume that the dtype is a scalar. + col_dtypes = {column: dtype for column in self._query_compiler.columns} + + # ensure values are pandas dtypes + col_dtypes = {k: pandas_dtype(v) for k, v in col_dtypes.items()} + new_query_compiler = self._query_compiler.astype(col_dtypes, errors=errors) + return self._create_or_update_from_compiler(new_query_compiler, not copy) + + +# Modin defaults to pandsa when `level` is specified, and has some extra axis validation that +# is guarded in newer versions. +@register_base_override("drop") +def drop( + self, + labels: IndexLabel = None, + axis: Axis = 0, + index: IndexLabel = None, + columns: IndexLabel = None, + level: Level = None, + inplace: bool = False, + errors: IgnoreRaise = "raise", +) -> BasePandasDataset | None: + """ + Drop specified labels from `BasePandasDataset`. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + inplace = validate_bool_kwarg(inplace, "inplace") + if labels is not None: + if index is not None or columns is not None: + raise ValueError("Cannot specify both 'labels' and 'index'/'columns'") + axes = {self._get_axis_number(axis): labels} + elif index is not None or columns is not None: + axes = {0: index, 1: columns} + else: + raise ValueError( + "Need to specify at least one of 'labels', 'index' or 'columns'" + ) + + for axis, labels in axes.items(): + if labels is not None: + if level is not None and not self._query_compiler.has_multiindex(axis=axis): + # Same error as native pandas. + raise AssertionError("axis must be a MultiIndex") + # According to pandas documentation, a tuple will be used as a single + # label and not treated as a list-like. + if not is_list_like(labels) or isinstance(labels, tuple): + axes[axis] = [labels] + + new_query_compiler = self._query_compiler.drop( + index=axes.get(0), columns=axes.get(1), level=level, errors=errors + ) + return self._create_or_update_from_compiler(new_query_compiler, inplace) + + +# Modin calls len(self.index) instead of a direct query compiler method. +@register_base_override("__len__") +def __len__(self) -> int: + """ + Return length of info axis. + + Returns + ------- + int + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self._query_compiler.get_axis_len(axis=0) + + +# Snowpark pandas ignores `copy`. +@register_base_override("set_axis") +def set_axis( + self, + labels: IndexLabel, + *, + axis: Axis = 0, + copy: bool | NoDefault = no_default, +): + """ + Assign desired index to given axis. + """ + # Behavior based on copy: + # ----------------------------------- + # - In native pandas, copy determines whether to create a copy of the data (not DataFrame). + # - We cannot emulate the native pandas' copy behavior in Snowpark since a copy of only data + # cannot be created -- you can only copy the whole object (DataFrame/Series). + # + # Snowpark behavior: + # ------------------ + # - copy is kept for compatibility with native pandas but is ignored. The user is warned that copy is unused. + # Warn user that copy does not do anything. + if copy is not no_default: + WarningMessage.single_warning( + message=f"{type(self).__name__}.set_axis 'copy' keyword is unused and is ignored." + ) + if labels is None: + raise TypeError("None is not a valid value for the parameter 'labels'.") + + # Determine whether to update self or a copy and perform update. + obj = self.copy() + setattr(obj, axis, labels) + return obj + + +# Modin has different behavior for empty dataframes and some slightly different length validation. +@register_base_override("describe") +def describe( + self, + percentiles: ListLike | None = None, + include: ListLike | Literal["all"] | None = None, + exclude: ListLike | None = None, +) -> BasePandasDataset: + """ + Generate descriptive statistics. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + percentiles = _refine_percentiles(percentiles) + data = self + if self._is_dataframe: + # Upstream modin lacks this check because it defaults to pandas for describing empty dataframes + if len(self.columns) == 0: + raise ValueError("Cannot describe a DataFrame without columns") + + # include/exclude are ignored for Series + if (include is None) and (exclude is None): + # when some numerics are found, keep only numerics + default_include: list[npt.DTypeLike] = [np.number] + default_include.append("datetime") + data = self.select_dtypes(include=default_include) + if len(data.columns) == 0: + data = self + elif include == "all": + if exclude is not None: + raise ValueError("exclude must be None when include is 'all'") + data = self + else: + data = self.select_dtypes( + include=include, + exclude=exclude, + ) + # Upstream modin uses data.empty, but that incurs an extra row count query + if self._is_dataframe and len(data.columns) == 0: + # Match pandas error from concatenating empty list of series descriptions. + raise ValueError("No objects to concatenate") + + return self.__constructor__( + query_compiler=data._query_compiler.describe(percentiles=percentiles) + ) + + +# Modin does type validation on self that Snowpark pandas defers to SQL. +@register_base_override("diff") +def diff(self, periods: int = 1, axis: Axis = 0): + """ + First discrete difference of element. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + # We must only accept integer (or float values that are whole numbers) + # for periods. + int_periods = validate_int_kwarg(periods, "periods", float_allowed=True) + axis = self._get_axis_number(axis) + return self.__constructor__( + query_compiler=self._query_compiler.diff(axis=axis, periods=int_periods) + ) + + +# Modin does an unnecessary len call when n == 0. +@register_base_override("tail") +def tail(self, n: int = 5): + if n == 0: + return self.iloc[0:0] + return self.iloc[-n:] + + +# Snowpark pandas does extra argument validation (which should probably be deferred to SQL instead). +@register_base_override("idxmax") +def idxmax(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, D200 + """ + Return index of first occurrence of maximum over requested axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + dtypes = self._get_dtypes() + if ( + axis == 1 + and not numeric_only + and any(not is_numeric_dtype(d) for d in dtypes) + and len(set(dtypes)) > 1 + ): + # For numeric_only=False, if we have any non-numeric dtype, e.g. + # a string type, we need every other column to be of the same type. + # We can't compare two objects of different non-numeric types, e.g. + # a string and a timestamp. + # If we have only numeric data, we can compare columns even if they + # different types, e.g. we can compare an int column to a float + # column. + raise TypeError("'>' not supported for these dtypes") + axis = self._get_axis_number(axis) + return self._reduce_dimension( + self._query_compiler.idxmax(axis=axis, skipna=skipna, numeric_only=numeric_only) + ) + + +# Snowpark pandas does extra argument validation (which should probably be deferred to SQL instead). +@register_base_override("idxmin") +def idxmin(self, axis=0, skipna=True, numeric_only=False): # noqa: PR01, RT01, D200 + """ + Return index of first occurrence of minimum over requested axis. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + dtypes = self._get_dtypes() + if ( + axis == 1 + and not numeric_only + and any(not is_numeric_dtype(d) for d in dtypes) + and len(set(dtypes)) > 1 + ): + # For numeric_only=False, if we have any non-numeric dtype, e.g. + # a string type, we need every other column to be of the same type. + # We can't compare two objects of different non-numeric types, e.g. + # a string and a timestamp. + # If we have only numeric data, we can compare columns even if they + # different types, e.g. we can compare an int column to a float + # column. + raise TypeError("'<' not supported for these dtypes") + axis = self._get_axis_number(axis) + return self._reduce_dimension( + self._query_compiler.idxmin(axis=axis, skipna=skipna, numeric_only=numeric_only) + ) + + +# Modin does dtype validation on unary ops that Snowpark pandas does not. +@register_base_override("__abs__") +def abs(self): # noqa: RT01, D200 + """ + Return a `BasePandasDataset` with absolute numeric value of each element. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self.__constructor__(query_compiler=self._query_compiler.abs()) + + +# Modin does dtype validation on unary ops that Snowpark pandas does not. +@register_base_override("__invert__") +def __invert__(self): + """ + Apply bitwise inverse to each element of the `BasePandasDataset`. + + Returns + ------- + BasePandasDataset + New BasePandasDataset containing bitwise inverse to each value. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self.__constructor__(query_compiler=self._query_compiler.invert()) + + +# Modin does dtype validation on unary ops that Snowpark pandas does not. +@register_base_override("__neg__") +def __neg__(self): + """ + Change the sign for every value of self. + + Returns + ------- + BasePandasDataset + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + return self.__constructor__(query_compiler=self._query_compiler.negative()) + + +# Modin needs to add a check for mapper is not None, which changes query counts in test_concat.py +# if not present. +@register_base_override("rename_axis") +def rename_axis( + self, + mapper=lib.no_default, + *, + index=lib.no_default, + columns=lib.no_default, + axis=0, + copy=None, + inplace=False, +): # noqa: PR01, RT01, D200 + """ + Set the name of the axis for the index or columns. + """ + axes = {"index": index, "columns": columns} + + if copy is None: + copy = True + + if axis is not None: + axis = self._get_axis_number(axis) + + inplace = validate_bool_kwarg(inplace, "inplace") + + if mapper is not lib.no_default and mapper is not None: + # Use v0.23 behavior if a scalar or list + non_mapper = is_scalar(mapper) or ( + is_list_like(mapper) and not is_dict_like(mapper) + ) + if non_mapper: + return self._set_axis_name(mapper, axis=axis, inplace=inplace) + else: + raise ValueError("Use `.rename` to alter labels with a mapper.") + else: + # Use new behavior. Means that index and/or columns is specified + result = self if inplace else self.copy(deep=copy) + + for axis in range(self.ndim): + v = axes.get(pandas.DataFrame._get_axis_name(axis)) + if v is lib.no_default: + continue + non_mapper = is_scalar(v) or (is_list_like(v) and not is_dict_like(v)) + if non_mapper: + newnames = v + else: + + def _get_rename_function(mapper): + if isinstance(mapper, (dict, BasePandasDataset)): + + def f(x): + if x in mapper: + return mapper[x] + else: + return x + + else: + f = mapper + + return f + + f = _get_rename_function(v) + curnames = self.index.names if axis == 0 else self.columns.names + newnames = [f(name) for name in curnames] + result._set_axis_name(newnames, axis=axis, inplace=True) + if not inplace: + return result + + +# Snowpark pandas has custom dispatch logic for ufuncs, while modin defaults to pandas. +@register_base_override("__array_ufunc__") +def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): + """ + Apply the `ufunc` to the `BasePandasDataset`. + + Parameters + ---------- + ufunc : np.ufunc + The NumPy ufunc to apply. + method : str + The method to apply. + *inputs : tuple + The inputs to the ufunc. + **kwargs : dict + Additional keyword arguments. + + Returns + ------- + BasePandasDataset + The result of the ufunc applied to the `BasePandasDataset`. + """ + # Use pandas version of ufunc if it exists + if method != "__call__": + # Return sentinel value NotImplemented + return NotImplemented # pragma: no cover + from snowflake.snowpark.modin.plugin.utils.numpy_to_pandas import ( + numpy_to_pandas_universal_func_map, + ) + + if ufunc.__name__ in numpy_to_pandas_universal_func_map: + ufunc = numpy_to_pandas_universal_func_map[ufunc.__name__] + return ufunc(self, inputs[1:], kwargs) + # return the sentinel NotImplemented if we do not support this function + return NotImplemented # pragma: no cover + + +# Snowpark pandas does extra argument validation. +@register_base_override("reindex") +def reindex( + self, + index=None, + columns=None, + copy=True, + **kwargs, +): # noqa: PR01, RT01, D200 + """ + Conform `BasePandasDataset` to new index with optional filling logic. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + if kwargs.get("limit", None) is not None and kwargs.get("method", None) is None: + raise ValueError( + "limit argument only valid if doing pad, backfill or nearest reindexing" + ) + new_query_compiler = None + if index is not None: + if not isinstance(index, pandas.Index) or not index.equals(self.index): + new_query_compiler = self._query_compiler.reindex( + axis=0, labels=index, **kwargs + ) + if new_query_compiler is None: + new_query_compiler = self._query_compiler + final_query_compiler = None + if columns is not None: + if not isinstance(index, pandas.Index) or not columns.equals(self.columns): + final_query_compiler = new_query_compiler.reindex( + axis=1, labels=columns, **kwargs + ) + if final_query_compiler is None: + final_query_compiler = new_query_compiler + return self._create_or_update_from_compiler( + final_query_compiler, inplace=False if copy is None else not copy + ) + + +# No direct override annotation; used as part of `property`. +# Snowpark pandas may return a custom lazy index object. +def _get_index(self): + """ + Get the index for this DataFrame. + + Returns + ------- + pandas.Index + The union of all indexes across the partitions. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + from snowflake.snowpark.modin.plugin.extensions.index import Index + + if self._query_compiler.is_multiindex(): + # Lazy multiindex is not supported + return self._query_compiler.index + + idx = Index(query_compiler=self._query_compiler) + idx._set_parent(self) + return idx + + +# No direct override annotation; used as part of `property`. +# Snowpark pandas may return a custom lazy index object. +def _set_index(self, new_index: Axes) -> None: + """ + Set the index for this DataFrame. + + Parameters + ---------- + new_index : pandas.Index + The new index to set this. + """ + # TODO: SNOW-1119855: Modin upgrade - modin.pandas.base.BasePandasDataset + self._update_inplace( + new_query_compiler=self._query_compiler.set_index( + [s._query_compiler for s in self._to_series_list(ensure_index(new_index))] + ) + ) + + +# Snowpark pandas may return a custom lazy index object. +register_base_override("index")(property(_get_index, _set_index)) diff --git a/src/snowflake/snowpark/modin/plugin/extensions/index.py b/src/snowflake/snowpark/modin/plugin/extensions/index.py index 95fcf684924..2682fd2b985 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/index.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/index.py @@ -29,6 +29,7 @@ import modin import numpy as np import pandas as native_pd +from modin.pandas.base import BasePandasDataset from pandas import get_option from pandas._libs import lib from pandas._libs.lib import is_list_like, is_scalar @@ -48,7 +49,6 @@ from pandas.core.dtypes.inference import is_hashable from snowflake.snowpark.modin.pandas import DataFrame, Series -from snowflake.snowpark.modin.pandas.base import BasePandasDataset from snowflake.snowpark.modin.pandas.utils import try_convert_index_to_native from snowflake.snowpark.modin.plugin._internal.telemetry import TelemetryMeta from snowflake.snowpark.modin.plugin._internal.timestamp_utils import DateTimeOrigin @@ -398,8 +398,7 @@ def values(self) -> ArrayLike: return self.to_pandas().values @property - @index_not_implemented() - def is_monotonic_increasing(self) -> None: + def is_monotonic_increasing(self) -> bool: """ Return a boolean if the values are equal or increasing. @@ -411,12 +410,20 @@ def is_monotonic_increasing(self) -> None: See Also -------- Index.is_monotonic_decreasing : Check if the values are equal or decreasing + + Examples + -------- + >>> pd.Index([1, 2, 3]).is_monotonic_increasing + True + >>> pd.Index([1, 2, 2]).is_monotonic_increasing + True + >>> pd.Index([1, 3, 2]).is_monotonic_increasing + False """ - # TODO: SNOW-1458134 implement is_monotonic_increasing + return self.to_series().is_monotonic_increasing @property - @index_not_implemented() - def is_monotonic_decreasing(self) -> None: + def is_monotonic_decreasing(self) -> bool: """ Return a boolean if the values are equal or decreasing. @@ -428,8 +435,17 @@ def is_monotonic_decreasing(self) -> None: See Also -------- Index.is_monotonic_increasing : Check if the values are equal or increasing + + Examples + -------- + >>> pd.Index([3, 2, 1]).is_monotonic_decreasing + True + >>> pd.Index([3, 2, 2]).is_monotonic_decreasing + True + >>> pd.Index([3, 1, 2]).is_monotonic_decreasing + False """ - # TODO: SNOW-1458134 implement is_monotonic_decreasing + return self.to_series().is_monotonic_decreasing @property def is_unique(self) -> bool: diff --git a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py index 0afea30e29a..a33d7702203 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/series_overrides.py @@ -161,6 +161,7 @@ def plot( @register_series_accessor("transform") +@snowpark_pandas_telemetry_method_decorator @series_not_implemented() def transform(self, func, axis=0, *args, **kwargs): # noqa: PR01, RT01, D200 pass # pragma: no cover diff --git a/src/snowflake/snowpark/modin/plugin/extensions/timedelta_index.py b/src/snowflake/snowpark/modin/plugin/extensions/timedelta_index.py index 86ed2a5ded4..dac1a78f740 100644 --- a/src/snowflake/snowpark/modin/plugin/extensions/timedelta_index.py +++ b/src/snowflake/snowpark/modin/plugin/extensions/timedelta_index.py @@ -130,7 +130,6 @@ def __init__( } self._init_index(data, _CONSTRUCTOR_DEFAULTS, query_compiler, **kwargs) - @timedelta_index_not_implemented() @property def days(self) -> Index: """ @@ -142,15 +141,18 @@ def days(self) -> Index: Examples -------- - >>> idx = pd.to_timedelta(["0 days", "10 days", "20 days"]) # doctest: +SKIP - >>> idx # doctest: +SKIP - TimedeltaIndex(['0 days', '10 days', '20 days'], - dtype='timedelta64[ns]', freq=None) - >>> idx.days # doctest: +SKIP + >>> idx = pd.to_timedelta(["0 days", "10 days", "20 days"]) + >>> idx + TimedeltaIndex(['0 days', '10 days', '20 days'], dtype='timedelta64[ns]', freq=None) + >>> idx.days Index([0, 10, 20], dtype='int64') """ + return Index( + query_compiler=self._query_compiler.timedelta_property( + "days", include_index=True + ) + ) - @timedelta_index_not_implemented() @property def seconds(self) -> Index: """ @@ -162,15 +164,18 @@ def seconds(self) -> Index: Examples -------- - >>> idx = pd.to_timedelta([1, 2, 3], unit='s') # doctest: +SKIP - >>> idx # doctest: +SKIP - TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03'], - dtype='timedelta64[ns]', freq=None) - >>> idx.seconds # doctest: +SKIP - Index([1, 2, 3], dtype='int32') + >>> idx = pd.to_timedelta([1, 2, 3], unit='s') + >>> idx + TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03'], dtype='timedelta64[ns]', freq=None) + >>> idx.seconds + Index([1, 2, 3], dtype='int64') """ + return Index( + query_compiler=self._query_compiler.timedelta_property( + "seconds", include_index=True + ) + ) - @timedelta_index_not_implemented() @property def microseconds(self) -> Index: """ @@ -182,16 +187,20 @@ def microseconds(self) -> Index: Examples -------- - >>> idx = pd.to_timedelta([1, 2, 3], unit='us') # doctest: +SKIP - >>> idx # doctest: +SKIP + >>> idx = pd.to_timedelta([1, 2, 3], unit='us') + >>> idx TimedeltaIndex(['0 days 00:00:00.000001', '0 days 00:00:00.000002', '0 days 00:00:00.000003'], dtype='timedelta64[ns]', freq=None) - >>> idx.microseconds # doctest: +SKIP - Index([1, 2, 3], dtype='int32') + >>> idx.microseconds + Index([1, 2, 3], dtype='int64') """ + return Index( + query_compiler=self._query_compiler.timedelta_property( + "microseconds", include_index=True + ) + ) - @timedelta_index_not_implemented() @property def nanoseconds(self) -> Index: """ @@ -203,14 +212,19 @@ def nanoseconds(self) -> Index: Examples -------- - >>> idx = pd.to_timedelta([1, 2, 3], unit='ns') # doctest: +SKIP - >>> idx # doctest: +SKIP + >>> idx = pd.to_timedelta([1, 2, 3], unit='ns') + >>> idx TimedeltaIndex(['0 days 00:00:00.000000001', '0 days 00:00:00.000000002', '0 days 00:00:00.000000003'], dtype='timedelta64[ns]', freq=None) - >>> idx.nanoseconds # doctest: +SKIP - Index([1, 2, 3], dtype='int32') + >>> idx.nanoseconds + Index([1, 2, 3], dtype='int64') """ + return Index( + query_compiler=self._query_compiler.timedelta_property( + "nanoseconds", include_index=True + ) + ) @timedelta_index_not_implemented() @property diff --git a/src/snowflake/snowpark/modin/plugin/utils/frontend_constants.py b/src/snowflake/snowpark/modin/plugin/utils/frontend_constants.py new file mode 100644 index 00000000000..f2b28e8bfc1 --- /dev/null +++ b/src/snowflake/snowpark/modin/plugin/utils/frontend_constants.py @@ -0,0 +1,27 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +# Do not look up certain attributes in columns or index, as they're used for some +# special purposes, like serving remote context +# TODO: SNOW-1643986 examine whether to update upstream modin to follow this +_ATTRS_NO_LOOKUP = { + "____id_pack__", + "__name__", + "_cache", + "_ipython_canary_method_should_not_exist_", + "_ipython_display_", + "_repr_html_", + "_repr_javascript_", + "_repr_jpeg_", + "_repr_json_", + "_repr_latex_", + "_repr_markdown_", + "_repr_mimebundle_", + "_repr_pdf_", + "_repr_png_", + "_repr_svg_", + "__array_struct__", + "__array_interface__", + "_typ", +} diff --git a/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py index 3da545c64b6..f673bf157bf 100644 --- a/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py +++ b/src/snowflake/snowpark/modin/plugin/utils/numpy_to_pandas.py @@ -3,8 +3,9 @@ # from typing import Any, Optional, Union +from modin.pandas.base import BasePandasDataset + import snowflake.snowpark.modin.pandas as pd -from snowflake.snowpark.modin.pandas.base import BasePandasDataset from snowflake.snowpark.modin.pandas.utils import is_scalar from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage diff --git a/src/snowflake/snowpark/modin/plugin/utils/warning_message.py b/src/snowflake/snowpark/modin/plugin/utils/warning_message.py index 80805d72ac7..8bb85d51751 100644 --- a/src/snowflake/snowpark/modin/plugin/utils/warning_message.py +++ b/src/snowflake/snowpark/modin/plugin/utils/warning_message.py @@ -51,7 +51,7 @@ def ignored_argument(cls, operation: str, argument: str, message: str) -> None: @classmethod def mismatch_with_pandas(cls, operation: str, message: str) -> None: cls.single_warning( - f"`{operation}` implementation has mismatches with pandas:\n{message}." + f"`{operation}` implementation may have mismatches with pandas:\n{message}." ) @classmethod diff --git a/tests/integ/compiler/test_query_generator.py b/tests/integ/compiler/test_query_generator.py index 507b338d6e7..5ce4c005ad3 100644 --- a/tests/integ/compiler/test_query_generator.py +++ b/tests/integ/compiler/test_query_generator.py @@ -197,7 +197,7 @@ def test_table_create_from_large_query_breakdown(session, plan_source_generator) assert ( queries[PlanQueryType.QUERIES][0].sql - == f" CREATE TEMP TABLE {table_name} AS SELECT * FROM (select 1 as a, 2 as b)" + == f" CREATE SCOPED TEMPORARY TABLE {table_name} AS SELECT * FROM (select 1 as a, 2 as b)" ) diff --git a/tests/integ/modin/binary/test_binary_op.py b/tests/integ/modin/binary/test_binary_op.py index 9ae5db98369..cd036bcb045 100644 --- a/tests/integ/modin/binary/test_binary_op.py +++ b/tests/integ/modin/binary/test_binary_op.py @@ -2586,3 +2586,26 @@ def test_df_sub_series(): eval_snowpark_pandas_result( snow_df, native_df, lambda df: df.sub(df["two"], axis="index"), inplace=True ) + + +@sql_count_checker(query_count=2, join_count=0) +def test_binary_op_multi_series_from_same_df(): + native_df = native_pd.DataFrame( + { + "A": [1, 2, 3], + "B": [2, 3, 4], + "C": [4, 5, 6], + "D": [2, 2, 3], + }, + index=["a", "b", "c"], + ) + snow_df = pd.DataFrame(native_df) + # ensure performing more than one binary operation for series coming from same + # dataframe does not produce any join. + eval_snowpark_pandas_result( + snow_df, native_df, lambda df: df["A"] + df["B"] + df["C"] + ) + # perform binary operations in different orders + eval_snowpark_pandas_result( + snow_df, native_df, lambda df: (df["A"] + df["B"]) + (df["C"] + df["D"]) + ) diff --git a/tests/integ/modin/crosstab/conftest.py b/tests/integ/modin/crosstab/conftest.py new file mode 100644 index 00000000000..6203419321d --- /dev/null +++ b/tests/integ/modin/crosstab/conftest.py @@ -0,0 +1,91 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import numpy as np +import pandas as native_pd +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 + + +@pytest.fixture(scope="function") +def a(): + return np.array( + [ + "foo", + "foo", + "foo", + "foo", + "bar", + "bar", + "bar", + "bar", + "foo", + "foo", + "foo", + ], + dtype=object, + ) + + +@pytest.fixture(scope="function") +def b(): + return np.array( + [ + "one", + "one", + "one", + "two", + "one", + "one", + "one", + "two", + "two", + "two", + "one", + ], + dtype=object, + ) + + +@pytest.fixture(scope="function") +def c(): + return np.array( + [ + "dull", + "dull", + "shiny", + "dull", + "dull", + "shiny", + "shiny", + "dull", + "shiny", + "shiny", + "shiny", + ], + dtype=object, + ) + + +@pytest.fixture(scope="function") +def basic_crosstab_dfs(): + df = native_pd.DataFrame( + { + "species": ["dog", "cat", "dog", "dog", "cat", "cat", "dog", "cat"], + "favorite_food": [ + "chicken", + "fish", + "fish", + "beef", + "chicken", + "beef", + "fish", + "beef", + ], + "age": [7, 2, 8, 5, 9, 3, 6, 1], + } + ) + return df, pd.DataFrame(df) diff --git a/tests/integ/modin/crosstab/test_crosstab.py b/tests/integ/modin/crosstab/test_crosstab.py new file mode 100644 index 00000000000..276650519d9 --- /dev/null +++ b/tests/integ/modin/crosstab/test_crosstab.py @@ -0,0 +1,639 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import re + +import modin.pandas as pd +import numpy as np +import pandas as native_pd +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker +from tests.integ.modin.utils import eval_snowpark_pandas_result + + +@pytest.mark.parametrize("dropna", [True, False]) +class TestCrosstab: + def test_basic_crosstab_with_numpy_arrays(self, dropna, a, b, c): + query_count = 1 + join_count = 0 if dropna else 1 + with SqlCounter(query_count=query_count, join_count=join_count): + eval_snowpark_pandas_result( + pd, + native_pd, + lambda lib: lib.crosstab( + a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=dropna + ), + ) + + def test_basic_crosstab_with_numpy_arrays_different_lengths(self, dropna, a, b, c): + a = a[:-1] + b = b[:-2] + c = c[:-3] + with SqlCounter(query_count=0): + eval_snowpark_pandas_result( + pd, + native_pd, + lambda lib: lib.crosstab( + a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=dropna + ), + assert_exception_equal=True, + expect_exception=True, + expect_exception_match="All arrays must be of the same length", + expect_exception_type=ValueError, + ) + + # In these tests, `overlap` refers to the intersection of the indices + # of the Series objects being passed in to crosstab. crosstab takes + # only the intersection of the index objects of all Series when determining + # the final DataFrame to pass into pivot_table, so here, we are testing + # that we follow that behavior. + def test_basic_crosstab_with_series_objs_full_overlap(self, dropna, a, b, c): + # In this case, all indexes are identical - hence "full" overlap. + query_count = 2 + join_count = 5 if dropna else 10 + + def eval_func(lib): + if lib is pd: + return lib.crosstab( + a, + [lib.Series(b), lib.Series(c)], + rownames=["a"], + colnames=["b", "c"], + dropna=dropna, + ) + else: + return lib.crosstab( + a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=dropna + ) + + with SqlCounter(query_count=query_count, join_count=join_count): + eval_snowpark_pandas_result(pd, native_pd, eval_func) + + def test_basic_crosstab_with_series_objs_some_overlap(self, dropna, a, b, c): + # In this case, some values are shared across indexes (non-zero intersection), + # hence "some" overlap. + # When a mix of Series and non-Series objects are passed in, the non-Series + # objects are expected to have the same length as the intersection of the indexes + # of the Series objects. This test case passes because we pass in arrays that + # are the length of the intersection rather than the length of each of the Series. + query_count = 2 + join_count = 5 if dropna else 10 + b = native_pd.Series( + b, + index=list(range(len(a))), + ) + c = native_pd.Series( + c, + index=-1 * np.array(list(range(len(a)))), + ) + + # All columns have to be the same length (if NumPy arrays are present, then + # pandas errors if they do not match the length of the other Series after + # they are joined (i.e. filtered so that their indices are the same)). In + # this test, we truncate the numpy column so that the lengths are correct. + def eval_func(args_list): + a, b, c = args_list + if isinstance(b, native_pd.Series): + return native_pd.crosstab( + a[:1], [b, c], rownames=["a"], colnames=["b", "c"], dropna=dropna + ) + else: + return pd.crosstab( + a[:1], [b, c], rownames=["a"], colnames=["b", "c"], dropna=dropna + ) + + with SqlCounter(query_count=query_count, join_count=join_count): + native_args = [a, b, c] + snow_args = [a, pd.Series(b), pd.Series(c)] + eval_snowpark_pandas_result( + snow_args, + native_args, + eval_func, + ) + + @sql_count_checker(query_count=1, join_count=1) + def test_basic_crosstab_with_series_objs_some_overlap_error(self, dropna, a, b, c): + # Same as above - the intersection of the indexes of the Series objects + # is non-zero, but the indexes are not identical - hence "some" overlap. + # When a mix of Series and non-Series objects are passed in, the non-Series + # objects are expected to have the same length as the intersection of the indexes + # of the Series objects. This test case errors because we pass in arrays that + # are the length of the Series, rather than the length of the intersection of + # the indexes of the Series. + b = native_pd.Series( + b, + index=list(range(len(a))), + ) + c = native_pd.Series( + c, + index=-1 * np.array(list(range(len(a)))), + ) + + # All columns have to be the same length (if NumPy arrays are present, then + # pandas errors if they do not match the length of the other Series after + # they are joined (i.e. filtered so that their indices are the same)) + def eval_func(args_list): + a, b, c = args_list + if isinstance(b, native_pd.Series): + return native_pd.crosstab( + a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=dropna + ) + else: + return pd.crosstab( + a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=dropna + ) + + native_args = [a, b, c] + snow_args = [a, pd.Series(b), pd.Series(c)] + eval_snowpark_pandas_result( + snow_args, + native_args, + eval_func, + expect_exception=True, + expect_exception_match=re.escape( + "Length mismatch: Expected 11 rows, received array of length 1" + ), + expect_exception_type=ValueError, + assert_exception_equal=False, # Our error message is a little different. + ) + + @sql_count_checker(query_count=1, join_count=1) + def test_basic_crosstab_with_series_objs_no_overlap_error(self, dropna, a, b, c): + # In this case, no values are shared across the indexes - the intersection is an + # empty set - hence "no" overlap. We error here for the same reason as above - the + # arrays passed in should also be empty, but are non-empty. + b = native_pd.Series( + b, + index=list(range(len(a))), + ) + c = native_pd.Series( + c, + index=-1 - np.array(list(range(len(a)))), + ) + + # All columns have to be the same length (if NumPy arrays are present, then + # pandas errors if they do not match the length of the other Series after + # they are joined (i.e. filtered so that their indices are the same)) + def eval_func(args_list): + a, b, c = args_list + if isinstance(b, native_pd.Series): + return native_pd.crosstab( + a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=dropna + ) + else: + return pd.crosstab( + a, [b, c], rownames=["a"], colnames=["b", "c"], dropna=dropna + ) + + native_args = [a, b, c] + snow_args = [a, pd.Series(b), pd.Series(c)] + eval_snowpark_pandas_result( + snow_args, + native_args, + eval_func, + expect_exception=True, + expect_exception_match=re.escape( + "Length mismatch: Expected 11 rows, received array of length 0" + ), + expect_exception_type=ValueError, + assert_exception_equal=False, # Our error message is a little different. + ) + + def test_basic_crosstab_with_df_and_series_objs_pandas_errors_columns( + self, dropna, a, b, c + ): + query_count = 4 + join_count = 1 if dropna else 3 + a = native_pd.Series( + a, + dtype=object, + ) + b = native_pd.DataFrame( + { + "0": b, + "1": c, + } + ) + # pandas expects only Series objects, or DataFrames that have only a single column, while + # we support accepting DataFrames with multiple columns. + with pytest.raises( + AssertionError, match="arrays and names must have the same length" + ): + native_pd.crosstab(a, b, rownames=["a"], colnames=["b", "c"], dropna=dropna) + + def eval_func(args_list): + a, b = args_list + if isinstance(a, native_pd.Series): + return native_pd.crosstab( + a, + [b[c] for c in b.columns], + rownames=["a"], + colnames=["b", "c"], + dropna=dropna, + ) + else: + return pd.crosstab( + a, b, rownames=["a"], colnames=["b", "c"], dropna=dropna + ) + + with SqlCounter(query_count=query_count, join_count=join_count): + native_args = [a, b] + snow_args = [pd.Series(a), pd.DataFrame(b)] + eval_snowpark_pandas_result( + snow_args, + native_args, + eval_func, + ) + + def test_basic_crosstab_with_df_and_series_objs_pandas_errors_index( + self, dropna, a, b, c + ): + query_count = 6 + join_count = 5 if dropna else 17 + a = native_pd.Series( + a, + dtype=object, + ) + b = native_pd.DataFrame( + { + "0": b, + "1": c, + } + ) + # pandas expects only Series objects, or DataFrames that have only a single column, while + # we support accepting DataFrames with multiple columns. + with pytest.raises( + AssertionError, match="arrays and names must have the same length" + ): + native_pd.crosstab(b, a, rownames=["a", "b"], colnames=["c"], dropna=dropna) + + def eval_func(args_list): + a, b = args_list + if isinstance(a, native_pd.Series): + return native_pd.crosstab( + [b[c] for c in b.columns], + a, + rownames=["a", "b"], + colnames=["c"], + dropna=dropna, + ) + else: + return pd.crosstab( + b, a, rownames=["a", "b"], colnames=["c"], dropna=dropna + ) + + with SqlCounter(query_count=query_count, join_count=join_count): + native_args = [a, b] + snow_args = [pd.Series(a), pd.DataFrame(b)] + eval_snowpark_pandas_result( + snow_args, + native_args, + eval_func, + ) + + def test_margins(self, dropna, a, b, c): + query_count = 1 + join_count = 1 if dropna else 2 + union_count = 1 + + with SqlCounter( + query_count=query_count, join_count=join_count, union_count=union_count + ): + eval_snowpark_pandas_result( + pd, + native_pd, + lambda lib: lib.crosstab( + a, + [b, c], + rownames=["a"], + colnames=["b", "c"], + margins=True, + margins_name="MARGINS_NAME", + dropna=dropna, + ), + ) + + @pytest.mark.parametrize("normalize", [0, 1, True, "all", "index", "columns"]) + def test_normalize(self, dropna, normalize, a, b, c): + query_count = 1 if normalize in (0, "index") else 2 + join_count = 3 if normalize in (0, "index") else 2 + if dropna: + join_count -= 2 + + with SqlCounter(query_count=query_count, join_count=join_count): + eval_snowpark_pandas_result( + pd, + native_pd, + lambda lib: lib.crosstab( + a, + [b, c], + rownames=["a"], + colnames=["b", "c"], + normalize=normalize, + dropna=dropna, + ), + ) + + @pytest.mark.parametrize("normalize", [0, 1, True, "all", "index", "columns"]) + def test_normalize_and_margins(self, dropna, normalize, a, b, c): + counts = { + "columns": [3, 5 if dropna else 9, 4], + "index": [1, 5 if dropna else 8, 3], + "all": [3, 12 if dropna else 19, 7], + } + counts[0] = counts["index"] + counts[1] = counts["columns"] + + if normalize is True: + sql_counts = counts["all"] + else: + sql_counts = counts[normalize] + with SqlCounter( + query_count=sql_counts[0], + join_count=sql_counts[1], + union_count=sql_counts[2], + ): + eval_snowpark_pandas_result( + pd, + native_pd, + lambda lib: lib.crosstab( + a, + [b, c], + rownames=["a"], + colnames=["b", "c"], + normalize=normalize, + margins=True, + dropna=dropna, + ), + ) + + @pytest.mark.parametrize("normalize", [0, 1, "index", "columns"]) + @pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"]) + def test_normalize_margins_and_values(self, dropna, normalize, aggfunc, a, b, c): + counts = { + "columns": [3, 29 if dropna else 41, 4], + "index": [1, 23 if dropna else 32, 3], + "all": [3, 54 if dropna else 75, 7], + } + counts[0] = counts["index"] + counts[1] = counts["columns"] + vals = np.array([12, 10, 9, 4, 3, 49, 19, 20, 21, 34, 0]) + if normalize is True: + sql_counts = counts["all"] + else: + sql_counts = counts[normalize] + + def eval_func(lib): + df = lib.crosstab( + a, + [b, c], + rownames=["a"], + colnames=["b", "c"], + values=vals, + normalize=normalize, + margins=True, + dropna=dropna, + aggfunc=aggfunc, + ) + if aggfunc == "sum": + # When normalizing the data, we apply the normalization function to the + # entire table (including margins), which requires us to multiply by 2 + # (since the function takes the sum over the rows, and the margins row is + # itself the sum over the rows, causing the sum over all rows to be equal + # to 2 * the sum over the input rows). This hack allows us to save on joins + # but results in slight precision issues. + df = df.round(decimals=6) + return df + + with SqlCounter( + query_count=sql_counts[0], + join_count=sql_counts[1], + union_count=sql_counts[2], + ): + eval_snowpark_pandas_result( + pd, + native_pd, + eval_func, + ) + + @pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"]) + def test_margins_and_values(self, dropna, aggfunc, a, b, c): + vals = np.array([12, 10, 9, 4, 3, 49, 19, 20, 21, 34, 0]) + + def eval_func(lib): + df = lib.crosstab( + a, + [b, c], + rownames=["a"], + colnames=["b", "c"], + values=vals, + margins=True, + dropna=dropna, + aggfunc=aggfunc, + ) + return df + + with SqlCounter( + query_count=1, + join_count=7 if dropna else 10, + union_count=1, + ): + eval_snowpark_pandas_result( + pd, + native_pd, + eval_func, + ) + + @pytest.mark.parametrize("normalize", [0, 1, True, "all", "index", "columns"]) + @pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"]) + def test_normalize_and_values(self, dropna, normalize, aggfunc, a, b, c): + counts = { + "columns": [2, 4 if dropna else 10], + "index": [1, 5 if dropna else 11], + "all": [2, 4 if dropna else 10], + } + counts[0] = counts["index"] + counts[1] = counts["columns"] + vals = np.array([12, 10, 9, 4, 3, 49, 19, 20, 21, 34, 0]) + if normalize is True: + sql_counts = counts["all"] + else: + sql_counts = counts[normalize] + + def eval_func(lib): + df = lib.crosstab( + a, + [b, c], + rownames=["a"], + colnames=["b", "c"], + values=vals, + normalize=normalize, + dropna=dropna, + aggfunc=aggfunc, + ) + if aggfunc in ["sum", "max"]: + # When normalizing the data, we apply the normalization function to the + # entire table (including margins), which requires us to multiply by 2 + # (since the function takes the sum over the rows, and the margins row is + # itself the sum over the rows, causing the sum over all rows to be equal + # to 2 * the sum over the input rows). This hack allows us to save on joins + # but results in slight precision issues. + df = df.round(decimals=6) + return df + + with SqlCounter( + query_count=sql_counts[0], + join_count=sql_counts[1], + ): + eval_snowpark_pandas_result( + pd, + native_pd, + eval_func, + ) + + @pytest.mark.parametrize("normalize", ["all", True]) + @pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"]) + @sql_count_checker(query_count=0) + def test_normalize_margins_and_values_not_supported( + self, dropna, normalize, aggfunc, a, b, c + ): + vals = np.array([12, 10, 9, 4, 3, 49, 19, 20, 21, 34, 0]) + with pytest.raises( + NotImplementedError, + match='Snowpark pandas does not yet support passing in margins=True, normalize="all", and values.', + ): + pd.crosstab( + a, + [b, c], + rownames=["a"], + colnames=["b", "c"], + values=vals, + normalize=normalize, + margins=True, + dropna=dropna, + aggfunc=aggfunc, + ) + + @pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"]) + def test_values(self, dropna, aggfunc, basic_crosstab_dfs): + query_count = 1 + join_count = 2 if dropna else 5 + native_df = basic_crosstab_dfs[0] + + with SqlCounter(query_count=query_count, join_count=join_count): + eval_snowpark_pandas_result( + pd, + native_pd, + lambda lib: lib.crosstab( + native_df["species"].values, + native_df["favorite_food"].values, + values=native_df["age"].values, + aggfunc=aggfunc, + dropna=dropna, + ), + ) + + @pytest.mark.parametrize("aggfunc", ["count", "mean", "min", "max", "sum"]) + def test_values_series_like(self, dropna, aggfunc, basic_crosstab_dfs): + query_count = 5 + join_count = 2 if dropna else 5 + native_df, snow_df = basic_crosstab_dfs + + def eval_func(df): + if isinstance(df, pd.DataFrame): + return pd.crosstab( + df["species"], + df["favorite_food"], + values=df["age"], + aggfunc=aggfunc, + dropna=dropna, + ) + else: + return native_pd.crosstab( + df["species"], + df["favorite_food"], + values=df["age"], + aggfunc=aggfunc, + dropna=dropna, + ) + + with SqlCounter(query_count=query_count, join_count=join_count): + eval_snowpark_pandas_result( + snow_df, + native_df, + eval_func, + ) + + +@sql_count_checker(query_count=0) +def test_values_unsupported_aggfunc(basic_crosstab_dfs): + native_df = basic_crosstab_dfs[0] + + with pytest.raises( + NotImplementedError, + match="Snowpark pandas DataFrame.pivot_table does not yet support the aggregation 'median' with the given arguments.", + ): + pd.crosstab( + native_df["species"].values, + native_df["favorite_food"].values, + values=native_df["age"].values, + aggfunc="median", + dropna=False, + ) + + +@sql_count_checker(query_count=4) +def test_values_series_like_unsupported_aggfunc(basic_crosstab_dfs): + # The query count above comes from building the DataFrame + # that we pass in to pivot table. + _, snow_df = basic_crosstab_dfs + + with pytest.raises( + NotImplementedError, + match="Snowpark pandas DataFrame.pivot_table does not yet support the aggregation 'median' with the given arguments.", + ): + snow_df = pd.crosstab( + snow_df["species"], + snow_df["favorite_food"], + values=snow_df["age"], + aggfunc="median", + dropna=False, + ) + + +@sql_count_checker(query_count=0) +def test_values_aggfunc_one_supplied_should_error(a, b, c): + eval_snowpark_pandas_result( + pd, + native_pd, + lambda lib: lib.crosstab(index=a, columns=b, aggfunc="sum"), + expect_exception=True, + expect_exception_match="aggfunc cannot be used without values.", + expect_exception_type=ValueError, + assert_exception_equal=True, + ) + eval_snowpark_pandas_result( + pd, + native_pd, + lambda lib: lib.crosstab(index=a, columns=b, values=c), + expect_exception=True, + expect_exception_match="values cannot be used without an aggfunc.", + expect_exception_type=ValueError, + assert_exception_equal=True, + ) + + +@sql_count_checker(query_count=0) +def test_invalid_normalize(a, b): + eval_snowpark_pandas_result( + pd, + native_pd, + lambda lib: lib.crosstab(index=a, columns=b, normalize="invalid_value"), + expect_exception=True, + expect_exception_match="Not a valid normalize argument", + expect_exception_type=ValueError, + assert_exception_equal=True, + ) diff --git a/tests/integ/modin/frame/test_astype.py b/tests/integ/modin/frame/test_astype.py index 0c1d1faa31c..8007b264b4e 100644 --- a/tests/integ/modin/frame/test_astype.py +++ b/tests/integ/modin/frame/test_astype.py @@ -126,7 +126,7 @@ def test_astype_to_timedelta(dtype): eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.astype(dtype)) -@sql_count_checker(query_count=2) +@sql_count_checker(query_count=0) def test_astype_to_timedelta_negative(): native_datetime_df = native_pd.DataFrame( data={"col1": [pd.to_datetime("2000-01-01"), pd.to_datetime("2001-01-01")]} diff --git a/tests/integ/modin/groupby/test_value_counts.py b/tests/integ/modin/groupby/test_value_counts.py new file mode 100644 index 00000000000..1f1b2f5c052 --- /dev/null +++ b/tests/integ/modin/groupby/test_value_counts.py @@ -0,0 +1,194 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import pandas as native_pd +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.modin.sql_counter import SqlCounter, sql_count_checker +from tests.integ.modin.utils import ( + assert_snowpark_pandas_equal_to_pandas, + create_test_dfs, + eval_snowpark_pandas_result, +) + +TEST_DATA = [ + { + "by": ["c", "b", "a", "a", "b", "b", "c", "a"], + "value1": ["ee", "aa", "bb", "aa", "bb", "cc", "dd", "aa"], + "value2": [1, 2, 3, 1, 1, 3, 2, 1], + }, + { + "by": ["key 1", None, None, "key 1", "key 2", "key 1"], + "value1": [None, "value", None, None, None, "value"], + "value2": ["value", None, None, None, "value", None], + }, + # Copied from pandas docs + { + "by": ["male", "male", "female", "male", "female", "male"], + "value1": ["low", "medium", "high", "low", "high", "low"], + "value2": ["US", "FR", "US", "FR", "FR", "FR"], + }, +] + + +@pytest.mark.parametrize("test_data", TEST_DATA) +@pytest.mark.parametrize("by", ["by", ["value1", "by"], ["by", "value2"]]) +@pytest.mark.parametrize("groupby_sort", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize( + "subset", + [None, ["value1"], ["value2"], ["value1", "value2"]], +) +@pytest.mark.parametrize("dropna", [True, False]) +def test_value_counts_basic( + test_data, by, groupby_sort, sort, ascending, subset, dropna +): + by_list = by if isinstance(by, list) else [by] + value_counts_kwargs = { + "sort": sort, + "ascending": ascending, + "subset": subset, + "dropna": dropna, + } + if len(set(by_list) & set(subset or [])): + # If subset and by overlap, check for ValueError + # Unlike pandas, we do not surface label names in the error message + with SqlCounter(query_count=0): + eval_snowpark_pandas_result( + *create_test_dfs(test_data), + lambda df: df.groupby(by=by, sort=groupby_sort).value_counts( + **value_counts_kwargs + ), + expect_exception=True, + expect_exception_type=ValueError, + expect_exception_match="in subset cannot be in the groupby column keys", + assert_exception_equal=False, + ) + return + with SqlCounter(query_count=1): + none_in_by_col = any(None in test_data[col] for col in by_list) + if not dropna and none_in_by_col: + # when dropna is False, pandas gives a different result because it drops all NaN + # keys in the multiindex + # https://github.com/pandas-dev/pandas/issues/56366 + # as a workaround, replace all Nones in the pandas frame with a sentinel value + # since NaNs are sorted last, we want the sentinel to sort to the end as well + VALUE_COUNTS_TEST_SENTINEL = "zzzzzz" + snow_df, native_df = create_test_dfs(test_data) + snow_result = snow_df.groupby(by=by, sort=groupby_sort).value_counts( + **value_counts_kwargs + ) + native_df = native_df.fillna(value=VALUE_COUNTS_TEST_SENTINEL) + native_result = native_df.groupby(by=by, sort=groupby_sort).value_counts( + **value_counts_kwargs + ) + native_result.index = native_result.index.map( + lambda x: tuple( + None if i == VALUE_COUNTS_TEST_SENTINEL else i for i in x + ) + ) + assert_snowpark_pandas_equal_to_pandas(snow_result, native_result) + else: + eval_snowpark_pandas_result( + *create_test_dfs(test_data), + lambda df: df.groupby(by=by, sort=groupby_sort).value_counts( + **value_counts_kwargs + ), + ) + + +@pytest.mark.parametrize("test_data", TEST_DATA) +@pytest.mark.parametrize("by", ["by", ["value1", "by"], ["by", "value2"]]) +@pytest.mark.parametrize("groupby_sort", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("ascending", [True, False]) +@pytest.mark.parametrize("normalize", [True, False]) +@sql_count_checker(query_count=1) +def test_value_counts_normalize( + test_data, by, groupby_sort, sort, ascending, normalize +): + value_counts_kwargs = { + "sort": sort, + "ascending": ascending, + "normalize": normalize, + } + # When normalize is set, pandas will (counter-intuitively) sort by the pre-normalization + # counts rather than the result proportions. This only matters if groupby_sort is False + # and sort is True. + # We work around this by using check_like=True + # See https://github.com/pandas-dev/pandas/issues/59307#issuecomment-2313767856 + check_like = not groupby_sort and sort and normalize + eval_snowpark_pandas_result( + *create_test_dfs(test_data), + lambda df: df.groupby(by=by, sort=groupby_sort).value_counts( + **value_counts_kwargs + ), + check_like=check_like, + ) + + +@pytest.mark.parametrize("test_data", TEST_DATA) +@pytest.mark.parametrize("by", ["by", ["value1", "by"], ["by", "value2"]]) +@pytest.mark.parametrize("groupby_sort", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("as_index", [True, False]) +@sql_count_checker(query_count=1) +def test_value_counts_as_index(test_data, by, groupby_sort, sort, as_index): + eval_snowpark_pandas_result( + *create_test_dfs(test_data), + lambda df: df.groupby(by=by, sort=groupby_sort, as_index=as_index).value_counts( + sort=sort + ), + ) + + +@pytest.mark.parametrize( + "subset, exception_cls", + [ + (["bad_key"], KeyError), # key not in frame + (["by"], ValueError), # subset cannot overlap with grouping columns + (["by", "bad_key"], ValueError), # subset cannot overlap with grouping columns + ], +) +def test_value_counts_bad_subset(subset, exception_cls): + # for KeyError, 1 query always runs to validate the length of the by list + with SqlCounter(query_count=1 if exception_cls is KeyError else 0): + eval_snowpark_pandas_result( + *create_test_dfs(TEST_DATA[0]), + lambda x: x.groupby(by=["by"]).value_counts(subset=subset), + expect_exception=True, + expect_exception_type=exception_cls, + assert_exception_equal=False, + ) + + +# An additional query is needed to validate the length of the by list +# A JOIN is needed to set the index to the by list +@sql_count_checker(query_count=2, join_count=1) +def test_value_counts_series(): + by = ["a", "a", "b", "b", "a", "c"] + native_ser = native_pd.Series( + [0, 0, None, 1, None, 3], + ) + snow_ser = pd.Series(native_ser) + eval_snowpark_pandas_result( + snow_ser, native_ser, lambda ser: ser.groupby(by=by).value_counts() + ) + + +# 1 query always runs to validate the length of the by list +@sql_count_checker(query_count=1) +def test_value_counts_bins_unimplemented(): + by = ["a", "a", "b", "b", "a", "c"] + native_ser = native_pd.Series( + [0, 0, None, 1, None, 3], + ) + snow_ser = pd.Series(native_ser) + with pytest.raises(NotImplementedError): + eval_snowpark_pandas_result( + snow_ser, native_ser, lambda ser: ser.groupby(by=by).value_counts(bins=3) + ) diff --git a/tests/integ/modin/index/test_monotonic.py b/tests/integ/modin/index/test_monotonic.py new file mode 100644 index 00000000000..5a15e4eb021 --- /dev/null +++ b/tests/integ/modin/index/test_monotonic.py @@ -0,0 +1,97 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import numpy as np +import pandas as native_pd +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.modin.sql_counter import sql_count_checker + + +@pytest.mark.parametrize( + "values", [[1, 2, 3], [3, 2, 1], [1, 3, 2], [1, 2, 2], [1, np.NaN, 3]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_numbers(values): + assert ( + pd.Index(values).is_monotonic_increasing + == native_pd.Index(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [[3, 2, 1], [1, 2, 3], [3, 1, 2], [2, 2, 1], [3, np.NaN, 1]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_numbers(values): + assert ( + pd.Index(values).is_monotonic_decreasing + == native_pd.Index(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", [["a", "b", "c"], ["c", "b", "a"], ["a", "c", "b"], ["ca", "cab", "cat"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_str(values): + assert ( + pd.Index(values).is_monotonic_increasing + == native_pd.Index(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [["c", "b", "a"], ["a", "b", "c"], ["c", "a", "b"], ["cat", "cab", "ca"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_str(values): + assert ( + pd.Index(values).is_monotonic_decreasing + == native_pd.Index(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[0, 2, 1]], + [ + native_pd.Timestamp("2018-01-01 00:00:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 01:20:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_dates(values): + assert ( + pd.DatetimeIndex(values).is_monotonic_increasing + == native_pd.DatetimeIndex(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[2, 0, 1]], + [ + native_pd.Timestamp("2018-01-01 01:20:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 00:00:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_dates(values): + assert ( + pd.DatetimeIndex(values).is_monotonic_decreasing + == native_pd.DatetimeIndex(values).is_monotonic_decreasing + ) diff --git a/tests/integ/modin/index/test_timedelta_index_methods.py b/tests/integ/modin/index/test_timedelta_index_methods.py index 1baafed24d2..646bd5ee983 100644 --- a/tests/integ/modin/index/test_timedelta_index_methods.py +++ b/tests/integ/modin/index/test_timedelta_index_methods.py @@ -8,6 +8,7 @@ import snowflake.snowpark.modin.plugin # noqa: F401 from tests.integ.modin.sql_counter import sql_count_checker +from tests.integ.modin.utils import assert_index_equal @sql_count_checker(query_count=3) @@ -54,12 +55,22 @@ def test_non_default_args(kwargs): pd.TimedeltaIndex(query_compiler=idx._query_compiler, **kwargs) -@pytest.mark.parametrize( - "property", ["days", "seconds", "microseconds", "nanoseconds", "inferred_freq"] -) +@pytest.mark.parametrize("property", ["components", "inferred_freq"]) @sql_count_checker(query_count=0) def test_property_not_implemented(property): snow_index = pd.TimedeltaIndex(["1 days", "2 days"]) msg = f"Snowpark pandas does not yet support the property TimedeltaIndex.{property}" with pytest.raises(NotImplementedError, match=msg): getattr(snow_index, property) + + +@pytest.mark.parametrize("attr", ["days", "seconds", "microseconds", "nanoseconds"]) +@sql_count_checker(query_count=1) +def test_timedelta_index_properties(attr): + native_index = native_pd.TimedeltaIndex( + ["1d", "1h", "60s", "1s", "800ms", "5us", "6ns", "1d 3s", "9m 15s 8us", None] + ) + snow_index = pd.Index(native_index) + assert_index_equal( + getattr(snow_index, attr), getattr(native_index, attr), exact=False + ) diff --git a/tests/integ/modin/series/test_astype.py b/tests/integ/modin/series/test_astype.py index 9c00e9a675d..5bbce79b01b 100644 --- a/tests/integ/modin/series/test_astype.py +++ b/tests/integ/modin/series/test_astype.py @@ -418,7 +418,7 @@ def test_astype_to_timedelta(data): ) -@sql_count_checker(query_count=2) +@sql_count_checker(query_count=0) def test_astype_to_timedelta_negative(): native_datetime_series = native_pd.Series( data=[pd.to_datetime("2000-01-01"), pd.to_datetime("2001-01-01")] diff --git a/tests/integ/modin/series/test_monotonic.py b/tests/integ/modin/series/test_monotonic.py new file mode 100644 index 00000000000..8726b9d9bd8 --- /dev/null +++ b/tests/integ/modin/series/test_monotonic.py @@ -0,0 +1,97 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +import modin.pandas as pd +import numpy as np +import pandas as native_pd +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from tests.integ.modin.sql_counter import sql_count_checker + + +@pytest.mark.parametrize( + "values", [[1, 2, 3], [3, 2, 1], [1, 3, 2], [1, 2, 2], [1, np.NaN, 3]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_numbers(values): + assert ( + pd.Series(values).is_monotonic_increasing + == native_pd.Series(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [[3, 2, 1], [1, 2, 3], [3, 1, 2], [2, 2, 1], [3, np.NaN, 1]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_numbers(values): + assert ( + pd.Series(values).is_monotonic_decreasing + == native_pd.Series(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", [["a", "b", "c"], ["c", "b", "a"], ["a", "c", "b"], ["ca", "cab", "cat"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_str(values): + assert ( + pd.Series(values).is_monotonic_increasing + == native_pd.Series(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", [["c", "b", "a"], ["a", "b", "c"], ["c", "a", "b"], ["cat", "cab", "ca"]] +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_str(values): + assert ( + pd.Series(values).is_monotonic_decreasing + == native_pd.Series(values).is_monotonic_decreasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[0, 2, 1]], + [ + native_pd.Timestamp("2018-01-01 00:00:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 01:20:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_increasing_dates(values): + assert ( + pd.Series(values).is_monotonic_increasing + == native_pd.Series(values).is_monotonic_increasing + ) + + +@pytest.mark.parametrize( + "values", + [ + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[::-1], + native_pd.date_range(start="1/1/2018", end="1/03/2018").values, + native_pd.date_range(start="1/1/2018", end="1/03/2018").values[[2, 0, 1]], + [ + native_pd.Timestamp("2018-01-01 01:20:00"), + native_pd.NaT, + native_pd.Timestamp("2018-01-01 00:00:00"), + ], + ], +) +@sql_count_checker(query_count=1) +def test_monotonic_decreasing_dates(values): + assert ( + pd.Series(values).is_monotonic_decreasing + == native_pd.Series(values).is_monotonic_decreasing + ) diff --git a/tests/integ/modin/test_concat.py b/tests/integ/modin/test_concat.py index 7e11a3537af..c1366c22506 100644 --- a/tests/integ/modin/test_concat.py +++ b/tests/integ/modin/test_concat.py @@ -1063,3 +1063,40 @@ def test_concat_keys(): } snow_df = pd.concat(data.values(), axis=1, keys=data.keys()) assert_frame_equal(snow_df, native_df, check_dtype=False) + + +@sql_count_checker(query_count=4, join_count=0) +def test_concat_series_from_same_df(join): + num_cols = 4 + select_data = [f'{i} as "{i}"' for i in range(num_cols)] + query = f"select {', '.join(select_data)}" + + # concat today uses join_on_index to concat all series, we use + # read_snowflake here so that the default index is created and + # managed by snowpark pandas, which is the same as row position + # column. This creates a valid optimization scenario for join, where + # join performed on the same row_position column doesn't require + # actual join. + # This can not be done with pd.DataFrame constructor because the index + # and row position column is controlled by client side, which are + # different columns. + df = pd.read_snowflake(query) + + series = [df[col] for col in df.columns] + final_df = pd.concat(series, join=join, axis=1) + + assert_frame_equal(df, final_df) + + +@sql_count_checker(query_count=4, join_count=0) +def test_df_creation_from_series_from_same_df(): + num_cols = 6 + select_data = [f'{i} as "{i}"' for i in range(num_cols)] + query = f"select {', '.join(select_data)}" + + df = pd.read_snowflake(query) + + df_dict = {col: df[col] for col in df.columns} + final_df = pd.DataFrame(df_dict) + + assert_frame_equal(df, final_df) diff --git a/tests/integ/modin/test_telemetry.py b/tests/integ/modin/test_telemetry.py index 9c24c6b6853..06fbc71eec7 100644 --- a/tests/integ/modin/test_telemetry.py +++ b/tests/integ/modin/test_telemetry.py @@ -398,7 +398,7 @@ def test_telemetry_getitem_setitem(): s = df["a"] assert len(df._query_compiler.snowpark_pandas_api_calls) == 0 assert s._query_compiler.snowpark_pandas_api_calls == [ - {"name": "DataFrame.BasePandasDataset.__getitem__"} + {"name": "DataFrame.__getitem__"} ] df["a"] = 0 df["b"] = 0 @@ -412,12 +412,12 @@ def test_telemetry_getitem_setitem(): # the telemetry log from the connector to validate _ = s[0] data = _extract_snowpark_pandas_telemetry_log_data( - expected_func_name="Series.BasePandasDataset.__getitem__", + expected_func_name="Series.__getitem__", session=s._query_compiler._modin_frame.ordered_dataframe.session, ) assert data["api_calls"] == [ - {"name": "DataFrame.BasePandasDataset.__getitem__"}, - {"name": "Series.BasePandasDataset.__getitem__"}, + {"name": "DataFrame.__getitem__"}, + {"name": "Series.__getitem__"}, ] @@ -547,3 +547,18 @@ def test_telemetry_repr(): {"name": "Series.property.name_set"}, {"name": "Series.Series.__repr__"}, ] + + +@sql_count_checker(query_count=0) +def test_telemetry_copy(): + # copy() is defined in upstream modin's BasePandasDataset class, and not overridden by any + # child class or the extensions module. + s = pd.Series([1, 2, 3, 4]) + copied = s.copy() + assert s._query_compiler.snowpark_pandas_api_calls == [ + {"name": "Series.property.name_set"} + ] + assert copied._query_compiler.snowpark_pandas_api_calls == [ + {"name": "Series.property.name_set"}, + {"name": "Series.BasePandasDataset.copy"}, + ] diff --git a/tests/integ/modin/test_unimplemented.py b/tests/integ/modin/test_unimplemented.py index 8b1d6ef182f..deb5bce6af1 100644 --- a/tests/integ/modin/test_unimplemented.py +++ b/tests/integ/modin/test_unimplemented.py @@ -81,8 +81,6 @@ def helper(df): # unsupported methods that can only be applied on series # This set triggers SeriesDefault.register UNSUPPORTED_SERIES_METHODS = [ - (lambda se: se.is_monotonic_increasing, "property fget:is_monotonic_increasing"), - (lambda se: se.is_monotonic_decreasing, "property fget:is_monotonic_decreasing"), (lambda df: df.transform(lambda x: x + 1), "transform"), ] @@ -180,8 +178,6 @@ def test_unsupported_str_methods(func, func_name, caplog) -> None: # unsupported methods for Index UNSUPPORTED_INDEX_METHODS = [ - lambda idx: idx.is_monotonic_increasing(), - lambda idx: idx.is_monotonic_decreasing(), lambda idx: idx.nbytes(), lambda idx: idx.memory_usage(), lambda idx: idx.delete(), diff --git a/tests/integ/modin/tools/test_to_datetime.py b/tests/integ/modin/tools/test_to_datetime.py index a0ac55958a9..1ea3445d15a 100644 --- a/tests/integ/modin/tools/test_to_datetime.py +++ b/tests/integ/modin/tools/test_to_datetime.py @@ -104,7 +104,7 @@ def test_to_datetime_format(self, cache, box, format, expected): ["1/3/2000", "20000103", "%m/%d/%Y"], ], ) - @sql_count_checker(query_count=1) + @sql_count_checker(query_count=0) def test_to_datetime_format_scalar(self, cache, arg, expected, format): result = to_datetime(arg, format=format, cache=cache) expected = Timestamp(expected) @@ -120,7 +120,7 @@ def test_to_datetime_format_scalar(self, cache, arg, expected, format): def test_to_datetime_format_unimplemented(self, cache, arg, format): with pytest.raises(NotImplementedError): assert to_datetime( - arg, format=format, cache=cache + pd.Index([arg]), format=format, cache=cache ) == native_pd.to_datetime(arg, format=format, cache=cache) @pytest.mark.parametrize( @@ -135,7 +135,7 @@ def test_to_datetime_format_not_match(self, cache, arg, format): SnowparkSQLException, match=f"Can't parse '{arg}' as timestamp with format 'DD/MM/YYYY'", ): - to_datetime(arg, format=format, cache=cache) + to_datetime(pd.Index([arg]), format=format, cache=cache).to_pandas() @sql_count_checker(query_count=2, udf_count=0) def test_to_datetime_format_YYYYMMDD(self, cache): @@ -302,7 +302,7 @@ def test_to_datetime_format_YYYYMMDD_overflow(self, input, expected): @sql_count_checker(query_count=2) def test_to_datetime_with_NA(self, data, format, expected): # GH#42957 - result = to_datetime(data, format=format) + result = to_datetime(pd.Index(data), format=format) assert_index_equal(result, pd.DatetimeIndex(expected)) @sql_count_checker(query_count=1, udf_count=0) @@ -328,7 +328,7 @@ def test_to_datetime_format_integer_year_month(self, cache): result = to_datetime(ser, format="%Y%m", cache=cache) assert_series_equal(result, expected, check_index_type=False) - @sql_count_checker(query_count=1) + @sql_count_checker(query_count=0) def test_to_datetime_format_microsecond(self, cache): month_abbr = calendar.month_abbr[4] val = f"01-{month_abbr}-2011 00:00:01.978" @@ -384,7 +384,9 @@ def test_to_datetime_format_microsecond(self, cache): ) @sql_count_checker(query_count=1) def test_to_datetime_format_time(self, cache, value, format, dt): - assert to_datetime(value, format=format, cache=cache) == dt + assert ( + to_datetime(pd.Index([value]), format=format, cache=cache).to_pandas() == dt + ) @sql_count_checker(query_count=0) def test_to_datetime_with_non_exact_unimplemented(self, cache): @@ -407,9 +409,9 @@ def test_to_datetime_with_non_exact_unimplemented(self, cache): "2012-01-01 09:00:00.001000000", ], ) - @sql_count_checker(query_count=2) + @sql_count_checker(query_count=1, join_count=1) def test_parse_nanoseconds_with_formula(self, cache, arg): - + arg = pd.Index([arg]) # GH8989 # truncating the nanoseconds when a format was provided expected = to_datetime(arg, cache=cache) @@ -426,7 +428,10 @@ def test_parse_nanoseconds_with_formula(self, cache, arg): @sql_count_checker(query_count=0) def test_to_datetime_format_weeks(self, value, fmt, expected, cache): with pytest.raises(NotImplementedError): - assert to_datetime(value, format=fmt, cache=cache) == expected + assert ( + to_datetime(pd.Index([value]), format=fmt, cache=cache).to_pandas()[0] + == expected + ) @pytest.mark.parametrize( "fmt,dates,expected_dates", @@ -497,7 +502,7 @@ def test_to_datetime_parse_tzname_or_tzoffset_fallback( ): # GH 13486 with pytest.raises(NotImplementedError): - to_datetime(dates, format=fmt).to_list() + to_datetime(pd.Index(dates), format=fmt).to_list() @sql_count_checker(query_count=4) def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): @@ -535,7 +540,7 @@ def test_to_datetime_parse_timezone_malformed(self, offset): SnowparkSQLException, match="Can't parse|as timestamp with format 'YYYY-MM-DD HH24:MI:SS TZHTZM'", ): - to_datetime([date], format=fmt).to_pandas() + to_datetime(pd.Index([date]), format=fmt).to_pandas() @sql_count_checker(query_count=0) def test_to_datetime_parse_timezone_keeps_name(self): @@ -551,7 +556,7 @@ class TestToDatetime: def test_to_datetime_mixed_datetime_and_string(self): d1 = datetime(2020, 1, 1, 17, tzinfo=timezone(-timedelta(hours=1))) d2 = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1))) - res = to_datetime(["2020-01-01 17:00:00 -0100", d2]) + res = to_datetime(pd.Index(["2020-01-01 17:00:00 -0100", d2])) # The input will become a series with variant type and the timezone is unaware by the Snowflake engine, so the # result ignores the timezone by default expected = native_pd.DatetimeIndex( @@ -559,7 +564,7 @@ def test_to_datetime_mixed_datetime_and_string(self): ) assert_index_equal(res, expected) # Set utc=True to make sure timezone aware in to_datetime - res = to_datetime(["2020-01-01 17:00:00 -0100", d2], utc=True) + res = to_datetime(pd.Index(["2020-01-01 17:00:00 -0100", d2]), utc=True) expected = pd.DatetimeIndex([d1, d2]) assert_index_equal(res, expected) @@ -584,15 +589,15 @@ def test_to_datetime_dtarr(self, tz): @sql_count_checker(query_count=1) def test_to_datetime_pydatetime(self): - actual = to_datetime(datetime(2008, 1, 15)) + actual = to_datetime(pd.Index([datetime(2008, 1, 15)])) assert actual == np.datetime64(datetime(2008, 1, 15)) @pytest.mark.parametrize( "dt", [np.datetime64("2000-01-01"), np.datetime64("2000-01-02")] ) - @sql_count_checker(query_count=1) + @sql_count_checker(query_count=1, join_count=2) def test_to_datetime_dt64s(self, cache, dt): - assert to_datetime(dt, cache=cache) == Timestamp(dt) + assert to_datetime(pd.Index([dt]), cache=cache)[0] == Timestamp(dt) @pytest.mark.parametrize( "sample", @@ -831,11 +836,11 @@ def test_to_datetime_df_negative(self): {"arg": 1490195805433502912, "unit": "ns"}, ], ) - @sql_count_checker(query_count=1) + @sql_count_checker(query_count=1, join_count=2) def test_to_datetime_unit(self, sample): - assert pd.to_datetime( - sample["arg"], unit=sample["unit"] - ) == native_pd.to_datetime(sample["arg"], unit=sample["unit"]) + assert pd.to_datetime(pd.Index([sample["arg"]]), unit=sample["unit"])[ + 0 + ] == native_pd.to_datetime(sample["arg"], unit=sample["unit"]) @sql_count_checker(query_count=0) def test_to_datetime_unit_negative(self): diff --git a/tests/integ/modin/types/test_timedelta_indexing.py b/tests/integ/modin/types/test_timedelta_indexing.py index 7c8bbcb8a10..af36b319a26 100644 --- a/tests/integ/modin/types/test_timedelta_indexing.py +++ b/tests/integ/modin/types/test_timedelta_indexing.py @@ -389,3 +389,183 @@ def loc_enlargement(key, item, df): loc_enlargement(key, item, snow_td.copy()).to_pandas().dtypes, snow_td.dtypes, ) + + +@pytest.mark.parametrize( + "key, join_count", + [(2, 2), ([2, 1], 2), (slice(1, None), 0), ([True, False, False, True], 1)], +) +def test_index_get_timedelta(key, join_count): + td_idx = native_pd.TimedeltaIndex( + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("2 days 1 minute"), + native_pd.Timedelta("3 days 1 nanoseconds"), + native_pd.Timedelta("100 nanoseconds"), + ] + ) + snow_td_idx = pd.TimedeltaIndex(td_idx) + + with SqlCounter(query_count=1, join_count=join_count): + if is_scalar(key): + assert snow_td_idx[key] == td_idx[key] + else: + eval_snowpark_pandas_result(snow_td_idx, td_idx, lambda idx: idx[key]) + + +@pytest.mark.parametrize( + "key, api, query_count, join_count", + [ + [2, "iat", 1, 2], + [native_pd.Timedelta("1 days 1 hour"), "at", 2, 2], + [[2, 1], "iloc", 1, 2], + [ + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("1 days 1 hour"), + ], + "loc", + 1, + 1, + ], + [slice(1, None), "iloc", 1, 0], + [[True, False, False, True], "iloc", 1, 1], + [[True, False, False, True], "loc", 1, 1], + ], +) +def test_series_with_timedelta_index(key, api, query_count, join_count): + td_idx = native_pd.TimedeltaIndex( + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("2 days 1 minute"), + native_pd.Timedelta("3 days 1 nanoseconds"), + native_pd.Timedelta("100 nanoseconds"), + ] + ) + snow_td_idx = pd.TimedeltaIndex(td_idx) + + data = [1, 2, 3, 4] + native_series = native_pd.Series(data, index=td_idx) + snow_series = pd.Series(data, index=snow_td_idx) + + with SqlCounter(query_count=query_count, join_count=join_count): + if is_scalar(key): + assert getattr(snow_series, api)[key] == getattr(native_series, api)[key] + else: + eval_snowpark_pandas_result( + snow_series, native_series, lambda s: getattr(s, api)[key] + ) + + +@pytest.mark.parametrize( + "key, api, query_count, join_count", + [ + [2, "iat", 1, 2], + [native_pd.Timedelta("1 days 1 hour"), "at", 2, 2], + [[2, 1], "iloc", 1, 2], + [ + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("1 days 1 hour"), + ], + "loc", + 1, + 1, + ], + [slice(1, None), "iloc", 1, 0], + [[True, False, False, True], "iloc", 1, 1], + [[True, False, False, True], "loc", 1, 1], + ], +) +def test_df_with_timedelta_index(key, api, query_count, join_count): + td_idx = native_pd.TimedeltaIndex( + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("2 days 1 minute"), + native_pd.Timedelta("3 days 1 nanoseconds"), + native_pd.Timedelta("100 nanoseconds"), + ] + ) + snow_td_idx = pd.TimedeltaIndex(td_idx) + + data = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + native_df = native_pd.DataFrame(data, index=td_idx) + snow_df = pd.DataFrame(data, index=snow_td_idx) + + with SqlCounter(query_count=query_count, join_count=join_count): + if is_scalar(key): + assert getattr(snow_df, api)[key, 0] == getattr(native_df, api)[key, 0] + else: + eval_snowpark_pandas_result( + snow_df, native_df, lambda s: getattr(s, api)[key] + ) + + +def test_df_with_timedelta_index_enlargement_during_indexing(): + td_idx = native_pd.TimedeltaIndex( + [ + native_pd.Timedelta("1 days 1 hour"), + native_pd.Timedelta("2 days 1 minute"), + native_pd.Timedelta("3 days 1 nanoseconds"), + native_pd.Timedelta("100 nanoseconds"), + ] + ) + snow_td_idx = pd.TimedeltaIndex(td_idx) + + data = [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]] + cols = ["a", "b", "c", "d"] + native_df = native_pd.DataFrame(data, index=td_idx, columns=cols) + snow_df = pd.DataFrame(data, index=snow_td_idx, columns=cols) + + def setitem_enlargement(key, item, df): + df[key] = item + return df + + item = 23 + + key = native_pd.Timedelta("2 days") + with SqlCounter(query_count=1, join_count=0): + eval_snowpark_pandas_result( + snow_df.copy(), + native_df.copy(), + functools.partial(setitem_enlargement, key, item), + ) + + key = native_pd.Timedelta("2 days 45 minutes") + with SqlCounter(query_count=1, join_count=1): + eval_snowpark_pandas_result( + snow_df["a"].copy(), + native_df["a"].copy(), + functools.partial(setitem_enlargement, key, item), + ) + + def loc_enlargement(key, item, df): + df.loc[key] = item + return df + + key = (slice(None, None, None), "x") + + with SqlCounter(query_count=1, join_count=0): + eval_snowpark_pandas_result( + snow_df.copy(), + native_df.copy(), + functools.partial(loc_enlargement, key, item), + ) + + key = native_pd.Timedelta("2 days 25 minutes") + with SqlCounter(query_count=1, join_count=1): + eval_snowpark_pandas_result( + snow_df["a"].copy(), + native_df["a"].copy(), + functools.partial(loc_enlargement, key, item), + ) + + # single row + key = (native_pd.Timedelta("2 days 45 minutes"), slice(None, None, None)) + + with SqlCounter(query_count=1, join_count=1): + eval_snowpark_pandas_result( + snow_df.copy(), + native_df.copy(), + functools.partial(loc_enlargement, key, item), + ) diff --git a/tests/integ/scala/test_snowflake_plan_suite.py b/tests/integ/scala/test_snowflake_plan_suite.py index 25ee097d27b..e5971e2d2f5 100644 --- a/tests/integ/scala/test_snowflake_plan_suite.py +++ b/tests/integ/scala/test_snowflake_plan_suite.py @@ -317,7 +317,7 @@ def test_create_scoped_temp_table(session): ) .queries[0] .sql - == f" CREATE TEMP TABLE {temp_table_name} AS SELECT * FROM ( SELECT * FROM ({table_name}))" + == f" CREATE TEMPORARY TABLE {temp_table_name} AS SELECT * FROM ( SELECT * FROM ({table_name}))" ) expected_sql = f' CREATE TEMPORARY TABLE {temp_table_name}("NUM" BIGINT, "STR" STRING(8))' assert expected_sql in ( @@ -342,7 +342,9 @@ def test_create_scoped_temp_table(session): .queries[0] .sql ) - expected_sql = f" CREATE TEMPORARY TABLE {temp_table_name} AS SELECT" + expected_sql = ( + f" CREATE SCOPED TEMPORARY TABLE {temp_table_name} AS SELECT" + ) assert expected_sql in ( session._plan_builder.save_as_table( table_name=[temp_table_name], diff --git a/tests/integ/test_large_query_breakdown.py b/tests/integ/test_large_query_breakdown.py index 1368bf460f2..fb4d5517b98 100644 --- a/tests/integ/test_large_query_breakdown.py +++ b/tests/integ/test_large_query_breakdown.py @@ -47,6 +47,7 @@ def setup(session): cte_optimization_enabled = session._cte_optimization_enabled is_query_compilation_stage_enabled = session._query_compilation_stage_enabled session._query_compilation_stage_enabled = True + session._large_query_breakdown_enabled = True yield session._query_compilation_stage_enabled = is_query_compilation_stage_enabled session._cte_optimization_enabled = cte_optimization_enabled @@ -77,11 +78,32 @@ def check_result_with_and_without_breakdown(session, df): session._large_query_breakdown_enabled = large_query_enabled +def test_no_valid_nodes_found(session, large_query_df, caplog): + """Test large query breakdown works with default bounds""" + set_bounds(300, 600) + + base_df = session.sql("select 1 as A, 2 as B") + df1 = base_df.with_column("A", col("A") + lit(1)) + df2 = base_df.with_column("B", col("B") + lit(1)) + + for i in range(102): + df1 = df1.with_column("A", col("A") + lit(i)) + df2 = df2.with_column("B", col("B") + lit(i)) + + union_df = df1.union_all(df2) + final_df = union_df.with_column("A", col("A") + lit(1)) + + with caplog.at_level(logging.DEBUG): + queries = final_df.queries + assert len(queries["queries"]) == 1, queries["queries"] + assert len(queries["post_actions"]) == 0, queries["post_actions"] + assert "Could not find a valid node for partitioning" in caplog.text + + def test_large_query_breakdown_with_cte_optimization(session): """Test large query breakdown works with cte optimized plan""" set_bounds(300, 600) session._cte_optimization_enabled = True - session._large_query_breakdown_enabled = True df0 = session.sql("select 2 as b, 32 as c") df1 = session.sql("select 1 as a, 2 as b").filter(col("a") == 1) df1 = df1.join(df0, on=["b"], how="inner") @@ -99,7 +121,7 @@ def test_large_query_breakdown_with_cte_optimization(session): check_result_with_and_without_breakdown(session, df4) assert len(df4.queries["queries"]) == 2 - assert df4.queries["queries"][0].startswith("CREATE TEMP TABLE") + assert df4.queries["queries"][0].startswith("CREATE SCOPED TEMPORARY TABLE") assert df4.queries["queries"][1].startswith("WITH SNOWPARK_TEMP_CTE_") assert len(df4.queries["post_actions"]) == 1 @@ -108,14 +130,13 @@ def test_large_query_breakdown_with_cte_optimization(session): def test_save_as_table(session, large_query_df): set_bounds(300, 600) - session._large_query_breakdown_enabled = True table_name = Utils.random_table_name() with session.query_history() as history: large_query_df.write.save_as_table(table_name, mode="overwrite") assert len(history.queries) == 4 assert history.queries[0].sql_text == "SELECT CURRENT_TRANSACTION()" - assert history.queries[1].sql_text.startswith("CREATE TEMP TABLE") + assert history.queries[1].sql_text.startswith("CREATE SCOPED TEMPORARY TABLE") assert history.queries[2].sql_text.startswith( f"CREATE OR REPLACE TABLE {table_name}" ) @@ -135,7 +156,7 @@ def test_update_delete_merge(session, large_query_df): t.update({"B": 0}, t.a == large_query_df.a, large_query_df) assert len(history.queries) == 4 assert history.queries[0].sql_text == "SELECT CURRENT_TRANSACTION()" - assert history.queries[1].sql_text.startswith("CREATE TEMP TABLE") + assert history.queries[1].sql_text.startswith("CREATE SCOPED TEMPORARY TABLE") assert history.queries[2].sql_text.startswith(f"UPDATE {table_name}") assert history.queries[3].sql_text.startswith("DROP TABLE If EXISTS") @@ -144,7 +165,7 @@ def test_update_delete_merge(session, large_query_df): t.delete(t.a == large_query_df.a, large_query_df) assert len(history.queries) == 4 assert history.queries[0].sql_text == "SELECT CURRENT_TRANSACTION()" - assert history.queries[1].sql_text.startswith("CREATE TEMP TABLE") + assert history.queries[1].sql_text.startswith("CREATE SCOPED TEMPORARY TABLE") assert history.queries[2].sql_text.startswith(f"DELETE FROM {table_name} USING") assert history.queries[3].sql_text.startswith("DROP TABLE If EXISTS") @@ -157,14 +178,13 @@ def test_update_delete_merge(session, large_query_df): ) assert len(history.queries) == 4 assert history.queries[0].sql_text == "SELECT CURRENT_TRANSACTION()" - assert history.queries[1].sql_text.startswith("CREATE TEMP TABLE") + assert history.queries[1].sql_text.startswith("CREATE SCOPED TEMPORARY TABLE") assert history.queries[2].sql_text.startswith(f"MERGE INTO {table_name} USING") assert history.queries[3].sql_text.startswith("DROP TABLE If EXISTS") def test_copy_into_location(session, large_query_df): set_bounds(300, 600) - session._large_query_breakdown_enabled = True remote_file_path = f"{session.get_session_stage()}/df.parquet" with session.query_history() as history: large_query_df.write.copy_into_location( @@ -176,14 +196,13 @@ def test_copy_into_location(session, large_query_df): ) assert len(history.queries) == 4, history.queries assert history.queries[0].sql_text == "SELECT CURRENT_TRANSACTION()" - assert history.queries[1].sql_text.startswith("CREATE TEMP TABLE") + assert history.queries[1].sql_text.startswith("CREATE SCOPED TEMPORARY TABLE") assert history.queries[2].sql_text.startswith(f"COPY INTO '{remote_file_path}'") assert history.queries[3].sql_text.startswith("DROP TABLE If EXISTS") def test_pivot_unpivot(session): set_bounds(300, 600) - session._large_query_breakdown_enabled = True session.sql( """create or replace temp table monthly_sales(A int, B int, month text) as select * from values @@ -215,7 +234,7 @@ def test_pivot_unpivot(session): plan_queries = final_df.queries assert len(plan_queries["queries"]) == 2 - assert plan_queries["queries"][0].startswith("CREATE TEMP TABLE") + assert plan_queries["queries"][0].startswith("CREATE SCOPED TEMPORARY TABLE") assert len(plan_queries["post_actions"]) == 1 assert plan_queries["post_actions"][0].startswith("DROP TABLE If EXISTS") @@ -223,7 +242,6 @@ def test_pivot_unpivot(session): def test_sort(session): set_bounds(300, 600) - session._large_query_breakdown_enabled = True base_df = session.sql("select 1 as A, 2 as B") df1 = base_df.with_column("A", col("A") + lit(1)) df2 = base_df.with_column("B", col("B") + lit(1)) @@ -239,7 +257,7 @@ def test_sort(session): plan_queries = final_df.queries assert len(plan_queries["queries"]) == 2 - assert plan_queries["queries"][0].startswith("CREATE TEMP TABLE") + assert plan_queries["queries"][0].startswith("CREATE SCOPED TEMPORARY TABLE") assert len(plan_queries["post_actions"]) == 1 assert plan_queries["post_actions"][0].startswith("DROP TABLE If EXISTS") @@ -258,7 +276,6 @@ def test_sort(session): def test_multiple_query_plan(session, large_query_df): set_bounds(300, 600) original_threshold = analyzer.ARRAY_BIND_THRESHOLD - session._large_query_breakdown_enabled = True try: analyzer.ARRAY_BIND_THRESHOLD = 2 base_df = session.create_dataframe([[1, 2], [3, 4]], schema=["A", "B"]) @@ -283,7 +300,7 @@ def test_multiple_query_plan(session, large_query_df): "CREATE OR REPLACE SCOPED TEMPORARY TABLE" ) assert plan_queries["queries"][1].startswith("INSERT INTO") - assert plan_queries["queries"][2].startswith("CREATE TEMP TABLE") + assert plan_queries["queries"][2].startswith("CREATE SCOPED TEMPORARY TABLE") assert len(plan_queries["post_actions"]) == 2 for query in plan_queries["post_actions"]: @@ -296,7 +313,6 @@ def test_multiple_query_plan(session, large_query_df): def test_optimization_skipped_with_transaction(session, large_query_df, caplog): """Test large query breakdown is skipped when transaction is enabled""" set_bounds(300, 600) - session._large_query_breakdown_enabled = True session.sql("begin").collect() assert Utils.is_active_transaction(session) with caplog.at_level(logging.DEBUG): @@ -316,7 +332,6 @@ def test_optimization_skipped_with_views_and_dynamic_tables(session, caplog): source_table = Utils.random_table_name() table_name = Utils.random_table_name() view_name = Utils.random_view_name() - session._large_query_breakdown_enabled = True try: session.sql("select 1 as a, 2 as b").write.save_as_table(source_table) df = session.table(source_table) @@ -344,12 +359,13 @@ def test_optimization_skipped_with_views_and_dynamic_tables(session, caplog): def test_async_job_with_large_query_breakdown(session, large_query_df): """Test large query breakdown gives same result for async and non-async jobs""" set_bounds(300, 600) - session._large_query_breakdown_enabled = True job = large_query_df.collect(block=False) result = job.result() assert result == large_query_df.collect() assert len(large_query_df.queries["queries"]) == 2 - assert large_query_df.queries["queries"][0].startswith("CREATE TEMP TABLE") + assert large_query_df.queries["queries"][0].startswith( + "CREATE SCOPED TEMPORARY TABLE" + ) assert len(large_query_df.queries["post_actions"]) == 1 assert large_query_df.queries["post_actions"][0].startswith( @@ -362,20 +378,24 @@ def test_complexity_bounds_affect_num_partitions(session, large_query_df): Also test that when partitions are added, drop table queries are added. """ set_bounds(300, 600) - session._large_query_breakdown_enabled = True assert len(large_query_df.queries["queries"]) == 2 assert len(large_query_df.queries["post_actions"]) == 1 - assert large_query_df.queries["queries"][0].startswith("CREATE TEMP TABLE") + assert large_query_df.queries["queries"][0].startswith( + "CREATE SCOPED TEMPORARY TABLE" + ) assert large_query_df.queries["post_actions"][0].startswith( "DROP TABLE If EXISTS" ) set_bounds(300, 412) - session._large_query_breakdown_enabled = True assert len(large_query_df.queries["queries"]) == 3 assert len(large_query_df.queries["post_actions"]) == 2 - assert large_query_df.queries["queries"][0].startswith("CREATE TEMP TABLE") - assert large_query_df.queries["queries"][1].startswith("CREATE TEMP TABLE") + assert large_query_df.queries["queries"][0].startswith( + "CREATE SCOPED TEMPORARY TABLE" + ) + assert large_query_df.queries["queries"][1].startswith( + "CREATE SCOPED TEMPORARY TABLE" + ) assert large_query_df.queries["post_actions"][0].startswith( "DROP TABLE If EXISTS" ) diff --git a/tests/notebooks/modin/MIMICHealthcareDemo.ipynb b/tests/notebooks/modin/MIMICHealthcareDemo.ipynb index 95a75e3c858..52388fe7ddd 100644 --- a/tests/notebooks/modin/MIMICHealthcareDemo.ipynb +++ b/tests/notebooks/modin/MIMICHealthcareDemo.ipynb @@ -34,10 +34,10 @@ "id": "90243e71-4cf0-4971-a95e-3f29e12449fc", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:35.536214Z", - "iopub.status.busy": "2024-08-28T17:27:35.535897Z", - "iopub.status.idle": "2024-08-28T17:27:36.977905Z", - "shell.execute_reply": "2024-08-28T17:27:36.977472Z" + "iopub.execute_input": "2024-08-29T20:52:59.781777Z", + "iopub.status.busy": "2024-08-29T20:52:59.781651Z", + "iopub.status.idle": "2024-08-29T20:53:01.465309Z", + "shell.execute_reply": "2024-08-29T20:53:01.464055Z" }, "tags": [] }, @@ -70,10 +70,10 @@ "id": "c309356f-14f8-469a-9257-b944b8951410", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:36.980268Z", - "iopub.status.busy": "2024-08-28T17:27:36.980102Z", - "iopub.status.idle": "2024-08-28T17:27:45.691050Z", - "shell.execute_reply": "2024-08-28T17:27:45.690724Z" + "iopub.execute_input": "2024-08-29T20:53:01.474913Z", + "iopub.status.busy": "2024-08-29T20:53:01.473383Z", + "iopub.status.idle": "2024-08-29T20:53:09.493517Z", + "shell.execute_reply": "2024-08-29T20:53:09.491938Z" }, "tags": [] }, @@ -97,10 +97,10 @@ "id": "68823bb5-fcd1-4f92-b767-e5ac83dc3df7", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:45.693385Z", - "iopub.status.busy": "2024-08-28T17:27:45.693251Z", - "iopub.status.idle": "2024-08-28T17:27:46.018818Z", - "shell.execute_reply": "2024-08-28T17:27:46.018231Z" + "iopub.execute_input": "2024-08-29T20:53:09.501294Z", + "iopub.status.busy": "2024-08-29T20:53:09.500816Z", + "iopub.status.idle": "2024-08-29T20:53:10.389392Z", + "shell.execute_reply": "2024-08-29T20:53:10.388512Z" }, "tags": [] }, @@ -145,10 +145,10 @@ "id": "9a7fc3b9-50db-49da-a18a-8865a3356f31", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:46.022960Z", - "iopub.status.busy": "2024-08-28T17:27:46.022736Z", - "iopub.status.idle": "2024-08-28T17:27:49.916885Z", - "shell.execute_reply": "2024-08-28T17:27:49.916624Z" + "iopub.execute_input": "2024-08-29T20:53:10.397141Z", + "iopub.status.busy": "2024-08-29T20:53:10.396693Z", + "iopub.status.idle": "2024-08-29T20:53:18.519633Z", + "shell.execute_reply": "2024-08-29T20:53:18.518329Z" }, "tags": [] }, @@ -331,10 +331,10 @@ "id": "7692a0af-de2f-42d1-9110-15ce104c2c5c", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:49.918782Z", - "iopub.status.busy": "2024-08-28T17:27:49.918678Z", - "iopub.status.idle": "2024-08-28T17:27:50.561066Z", - "shell.execute_reply": "2024-08-28T17:27:50.560658Z" + "iopub.execute_input": "2024-08-29T20:53:18.525954Z", + "iopub.status.busy": "2024-08-29T20:53:18.525686Z", + "iopub.status.idle": "2024-08-29T20:53:19.864003Z", + "shell.execute_reply": "2024-08-29T20:53:19.863649Z" }, "tags": [] }, @@ -390,10 +390,10 @@ "id": "5344da61-915d-43cf-894a-484876450748", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:50.563582Z", - "iopub.status.busy": "2024-08-28T17:27:50.563395Z", - "iopub.status.idle": "2024-08-28T17:27:50.768782Z", - "shell.execute_reply": "2024-08-28T17:27:50.768309Z" + "iopub.execute_input": "2024-08-29T20:53:19.866177Z", + "iopub.status.busy": "2024-08-29T20:53:19.866034Z", + "iopub.status.idle": "2024-08-29T20:53:20.363772Z", + "shell.execute_reply": "2024-08-29T20:53:20.363248Z" } }, "outputs": [ @@ -401,8 +401,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "WARNING:snowflake.snowpark.modin.plugin.utils.warning_message:`to_datetime` implementation has mismatches with pandas:\n", - "Snowpark pandas to_datetime uses Snowflake's automatic format detection to convert string to datetime when a format is not provided. In this case Snowflake's auto format may yield different result values compared to pandas..\n" + "WARNING:snowflake.snowpark.modin.plugin.utils.warning_message:`to_datetime` implementation may have mismatches with pandas:\n", + "Snowflake automatic format detection is used when a format is not provided. In this case Snowflake's auto format may yield different result values compared to pandas.See https://docs.snowflake.com/en/sql-reference/date-time-input-output#supported-formats-for-auto-detection for details.\n" ] } ], @@ -428,10 +428,10 @@ "id": "5f72ca6b-ae9a-4a68-a391-83b065785004", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:50.770869Z", - "iopub.status.busy": "2024-08-28T17:27:50.770722Z", - "iopub.status.idle": "2024-08-28T17:27:50.888703Z", - "shell.execute_reply": "2024-08-28T17:27:50.888387Z" + "iopub.execute_input": "2024-08-29T20:53:20.366126Z", + "iopub.status.busy": "2024-08-29T20:53:20.365983Z", + "iopub.status.idle": "2024-08-29T20:53:20.562742Z", + "shell.execute_reply": "2024-08-29T20:53:20.562425Z" }, "tags": [] }, @@ -446,10 +446,10 @@ "id": "ecc19928-1d3a-49b8-bc0d-4270e53bfc4c", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:50.890752Z", - "iopub.status.busy": "2024-08-28T17:27:50.890619Z", - "iopub.status.idle": "2024-08-28T17:27:51.188395Z", - "shell.execute_reply": "2024-08-28T17:27:51.188083Z" + "iopub.execute_input": "2024-08-29T20:53:20.565128Z", + "iopub.status.busy": "2024-08-29T20:53:20.564972Z", + "iopub.status.idle": "2024-08-29T20:53:21.237178Z", + "shell.execute_reply": "2024-08-29T20:53:21.236687Z" } }, "outputs": [], @@ -471,10 +471,10 @@ "id": "50c62f3f-a804-4efd-89bb-cf689a870055", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:51.190704Z", - "iopub.status.busy": "2024-08-28T17:27:51.190570Z", - "iopub.status.idle": "2024-08-28T17:27:51.563926Z", - "shell.execute_reply": "2024-08-28T17:27:51.563299Z" + "iopub.execute_input": "2024-08-29T20:53:21.239635Z", + "iopub.status.busy": "2024-08-29T20:53:21.239465Z", + "iopub.status.idle": "2024-08-29T20:53:21.790821Z", + "shell.execute_reply": "2024-08-29T20:53:21.790376Z" }, "tags": [] }, @@ -499,10 +499,10 @@ "id": "66ac1e04-4581-4292-8b7a-b88faa76edf5", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:51.567168Z", - "iopub.status.busy": "2024-08-28T17:27:51.566843Z", - "iopub.status.idle": "2024-08-28T17:27:52.325449Z", - "shell.execute_reply": "2024-08-28T17:27:52.325162Z" + "iopub.execute_input": "2024-08-29T20:53:21.793183Z", + "iopub.status.busy": "2024-08-29T20:53:21.793045Z", + "iopub.status.idle": "2024-08-29T20:53:23.191350Z", + "shell.execute_reply": "2024-08-29T20:53:23.191053Z" }, "tags": [] }, @@ -569,10 +569,10 @@ "id": "17b76fe7-4d6d-4eb4-bebe-55cc643b69f3", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:52.327478Z", - "iopub.status.busy": "2024-08-28T17:27:52.327310Z", - "iopub.status.idle": "2024-08-28T17:27:55.549227Z", - "shell.execute_reply": "2024-08-28T17:27:55.548770Z" + "iopub.execute_input": "2024-08-29T20:53:23.201474Z", + "iopub.status.busy": "2024-08-29T20:53:23.201235Z", + "iopub.status.idle": "2024-08-29T20:53:27.315733Z", + "shell.execute_reply": "2024-08-29T20:53:27.314718Z" }, "tags": [] }, @@ -595,10 +595,10 @@ "id": "8514feca-f6b3-4186-bd32-ef07ba8efed4", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:55.552055Z", - "iopub.status.busy": "2024-08-28T17:27:55.551878Z", - "iopub.status.idle": "2024-08-28T17:27:55.941773Z", - "shell.execute_reply": "2024-08-28T17:27:55.941284Z" + "iopub.execute_input": "2024-08-29T20:53:27.325717Z", + "iopub.status.busy": "2024-08-29T20:53:27.324858Z", + "iopub.status.idle": "2024-08-29T20:53:28.100711Z", + "shell.execute_reply": "2024-08-29T20:53:28.099954Z" }, "tags": [] }, @@ -613,10 +613,10 @@ "id": "bf8025c3-8657-41a7-8feb-6afab251ccfd", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:55.944225Z", - "iopub.status.busy": "2024-08-28T17:27:55.944051Z", - "iopub.status.idle": "2024-08-28T17:27:56.081283Z", - "shell.execute_reply": "2024-08-28T17:27:56.080891Z" + "iopub.execute_input": "2024-08-29T20:53:28.106124Z", + "iopub.status.busy": "2024-08-29T20:53:28.105739Z", + "iopub.status.idle": "2024-08-29T20:53:28.469703Z", + "shell.execute_reply": "2024-08-29T20:53:28.469403Z" }, "tags": [] }, @@ -642,10 +642,10 @@ "id": "60ba61f7-fa60-4a6d-8b06-1282d2f64382", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:56.083562Z", - "iopub.status.busy": "2024-08-28T17:27:56.083425Z", - "iopub.status.idle": "2024-08-28T17:27:56.085148Z", - "shell.execute_reply": "2024-08-28T17:27:56.084867Z" + "iopub.execute_input": "2024-08-29T20:53:28.471863Z", + "iopub.status.busy": "2024-08-29T20:53:28.471732Z", + "iopub.status.idle": "2024-08-29T20:53:28.473440Z", + "shell.execute_reply": "2024-08-29T20:53:28.473133Z" }, "tags": [] }, @@ -661,10 +661,10 @@ "id": "5cdeb9af-660a-4daa-98c5-f9e86699e9bd", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:56.086699Z", - "iopub.status.busy": "2024-08-28T17:27:56.086595Z", - "iopub.status.idle": "2024-08-28T17:27:57.259523Z", - "shell.execute_reply": "2024-08-28T17:27:57.259142Z" + "iopub.execute_input": "2024-08-29T20:53:28.475189Z", + "iopub.status.busy": "2024-08-29T20:53:28.475066Z", + "iopub.status.idle": "2024-08-29T20:53:30.036155Z", + "shell.execute_reply": "2024-08-29T20:53:30.035460Z" }, "tags": [] }, @@ -704,10 +704,10 @@ "id": "2b704957-4b20-41a9-abbb-1d963a0ea0d2", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:57.261881Z", - "iopub.status.busy": "2024-08-28T17:27:57.261730Z", - "iopub.status.idle": "2024-08-28T17:27:57.985080Z", - "shell.execute_reply": "2024-08-28T17:27:57.984756Z" + "iopub.execute_input": "2024-08-29T20:53:30.042861Z", + "iopub.status.busy": "2024-08-29T20:53:30.042419Z", + "iopub.status.idle": "2024-08-29T20:53:30.738218Z", + "shell.execute_reply": "2024-08-29T20:53:30.736870Z" }, "tags": [] }, @@ -754,10 +754,10 @@ "id": "1748639f-04b5-45e6-b836-2433b66fa29d", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:57.986888Z", - "iopub.status.busy": "2024-08-28T17:27:57.986758Z", - "iopub.status.idle": "2024-08-28T17:27:59.498296Z", - "shell.execute_reply": "2024-08-28T17:27:59.498013Z" + "iopub.execute_input": "2024-08-29T20:53:30.743369Z", + "iopub.status.busy": "2024-08-29T20:53:30.743214Z", + "iopub.status.idle": "2024-08-29T20:53:32.247987Z", + "shell.execute_reply": "2024-08-29T20:53:32.245175Z" }, "tags": [] }, @@ -799,10 +799,10 @@ "id": "24a34764-f442-4cc1-8b87-ed96ace34651", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:59.500159Z", - "iopub.status.busy": "2024-08-28T17:27:59.500025Z", - "iopub.status.idle": "2024-08-28T17:28:00.076867Z", - "shell.execute_reply": "2024-08-28T17:28:00.076522Z" + "iopub.execute_input": "2024-08-29T20:53:32.251992Z", + "iopub.status.busy": "2024-08-29T20:53:32.251663Z", + "iopub.status.idle": "2024-08-29T20:53:33.298148Z", + "shell.execute_reply": "2024-08-29T20:53:33.297812Z" }, "tags": [] }, @@ -834,10 +834,10 @@ "id": "96753257-acd4-4ba9-b81b-19dc0a2af53c", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:00.079097Z", - "iopub.status.busy": "2024-08-28T17:28:00.078936Z", - "iopub.status.idle": "2024-08-28T17:28:00.081233Z", - "shell.execute_reply": "2024-08-28T17:28:00.080958Z" + "iopub.execute_input": "2024-08-29T20:53:33.300115Z", + "iopub.status.busy": "2024-08-29T20:53:33.299993Z", + "iopub.status.idle": "2024-08-29T20:53:33.302005Z", + "shell.execute_reply": "2024-08-29T20:53:33.301532Z" }, "tags": [] }, @@ -871,10 +871,10 @@ "id": "2d26eee2-671a-4ff8-ac22-62612c1a1ced", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:00.083739Z", - "iopub.status.busy": "2024-08-28T17:28:00.083616Z", - "iopub.status.idle": "2024-08-28T17:28:00.944153Z", - "shell.execute_reply": "2024-08-28T17:28:00.943809Z" + "iopub.execute_input": "2024-08-29T20:53:33.315185Z", + "iopub.status.busy": "2024-08-29T20:53:33.315012Z", + "iopub.status.idle": "2024-08-29T20:53:34.722137Z", + "shell.execute_reply": "2024-08-29T20:53:34.721832Z" }, "tags": [] }, @@ -916,10 +916,10 @@ "id": "21aef8ae-47d8-4c77-8e04-270304c41d4e", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:00.946550Z", - "iopub.status.busy": "2024-08-28T17:28:00.946409Z", - "iopub.status.idle": "2024-08-28T17:28:02.622587Z", - "shell.execute_reply": "2024-08-28T17:28:02.622199Z" + "iopub.execute_input": "2024-08-29T20:53:34.724622Z", + "iopub.status.busy": "2024-08-29T20:53:34.724479Z", + "iopub.status.idle": "2024-08-29T20:53:37.680974Z", + "shell.execute_reply": "2024-08-29T20:53:37.680658Z" }, "tags": [] }, @@ -958,10 +958,10 @@ "id": "2d11b951-5b4c-4a98-ae4c-883fbccd56a7", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:02.624633Z", - "iopub.status.busy": "2024-08-28T17:28:02.624483Z", - "iopub.status.idle": "2024-08-28T17:28:02.933061Z", - "shell.execute_reply": "2024-08-28T17:28:02.932626Z" + "iopub.execute_input": "2024-08-29T20:53:37.683001Z", + "iopub.status.busy": "2024-08-29T20:53:37.682857Z", + "iopub.status.idle": "2024-08-29T20:53:38.194095Z", + "shell.execute_reply": "2024-08-29T20:53:38.193778Z" }, "tags": [] }, @@ -977,10 +977,10 @@ "id": "35155531-c8ff-4ed1-9a3e-e457176f9f20", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:02.935312Z", - "iopub.status.busy": "2024-08-28T17:28:02.935204Z", - "iopub.status.idle": "2024-08-28T17:28:04.421197Z", - "shell.execute_reply": "2024-08-28T17:28:04.420876Z" + "iopub.execute_input": "2024-08-29T20:53:38.196253Z", + "iopub.status.busy": "2024-08-29T20:53:38.196113Z", + "iopub.status.idle": "2024-08-29T20:53:40.521366Z", + "shell.execute_reply": "2024-08-29T20:53:40.520687Z" }, "tags": [] }, @@ -1139,10 +1139,10 @@ "id": "b8c41494-755a-485b-8119-9dfff98213df", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:04.423275Z", - "iopub.status.busy": "2024-08-28T17:28:04.423133Z", - "iopub.status.idle": "2024-08-28T17:28:05.150707Z", - "shell.execute_reply": "2024-08-28T17:28:05.150379Z" + "iopub.execute_input": "2024-08-29T20:53:40.525624Z", + "iopub.status.busy": "2024-08-29T20:53:40.525457Z", + "iopub.status.idle": "2024-08-29T20:53:41.863763Z", + "shell.execute_reply": "2024-08-29T20:53:41.862737Z" }, "tags": [] }, @@ -1192,10 +1192,10 @@ "metadata": { "collapsed": false, "execution": { - "iopub.execute_input": "2024-08-28T17:28:05.152844Z", - "iopub.status.busy": "2024-08-28T17:28:05.152694Z", - "iopub.status.idle": "2024-08-28T17:28:07.249455Z", - "shell.execute_reply": "2024-08-28T17:28:07.248760Z" + "iopub.execute_input": "2024-08-29T20:53:41.868179Z", + "iopub.status.busy": "2024-08-29T20:53:41.868013Z", + "iopub.status.idle": "2024-08-29T20:53:45.387411Z", + "shell.execute_reply": "2024-08-29T20:53:45.386513Z" }, "jupyter": { "outputs_hidden": false @@ -1212,10 +1212,10 @@ "id": "719049a4-0a5b-45da-bbd5-8ff073c95a93", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:07.253088Z", - "iopub.status.busy": "2024-08-28T17:28:07.252690Z", - "iopub.status.idle": "2024-08-28T17:28:07.972094Z", - "shell.execute_reply": "2024-08-28T17:28:07.971764Z" + "iopub.execute_input": "2024-08-29T20:53:45.394651Z", + "iopub.status.busy": "2024-08-29T20:53:45.394283Z", + "iopub.status.idle": "2024-08-29T20:53:48.805198Z", + "shell.execute_reply": "2024-08-29T20:53:48.804855Z" }, "tags": [] }, @@ -1240,10 +1240,10 @@ "id": "9e1f2052-7405-496c-b4de-76e031978cb5", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:07.974120Z", - "iopub.status.busy": "2024-08-28T17:28:07.973931Z", - "iopub.status.idle": "2024-08-28T17:28:07.979210Z", - "shell.execute_reply": "2024-08-28T17:28:07.978966Z" + "iopub.execute_input": "2024-08-29T20:53:48.807541Z", + "iopub.status.busy": "2024-08-29T20:53:48.807331Z", + "iopub.status.idle": "2024-08-29T20:53:48.813010Z", + "shell.execute_reply": "2024-08-29T20:53:48.812749Z" }, "tags": [] }, @@ -1678,10 +1678,10 @@ "id": "dcb50a0d-3f66-4376-a383-597789f83fa0", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:07.980853Z", - "iopub.status.busy": "2024-08-28T17:28:07.980748Z", - "iopub.status.idle": "2024-08-28T17:28:07.983038Z", - "shell.execute_reply": "2024-08-28T17:28:07.982709Z" + "iopub.execute_input": "2024-08-29T20:53:48.824720Z", + "iopub.status.busy": "2024-08-29T20:53:48.824554Z", + "iopub.status.idle": "2024-08-29T20:53:48.827307Z", + "shell.execute_reply": "2024-08-29T20:53:48.826940Z" }, "tags": [] }, @@ -1704,10 +1704,10 @@ "id": "4993b18c-7d2a-49b6-96f5-b4a7c6a38cc2", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:07.984580Z", - "iopub.status.busy": "2024-08-28T17:28:07.984481Z", - "iopub.status.idle": "2024-08-28T17:28:08.048040Z", - "shell.execute_reply": "2024-08-28T17:28:08.047710Z" + "iopub.execute_input": "2024-08-29T20:53:48.829359Z", + "iopub.status.busy": "2024-08-29T20:53:48.829219Z", + "iopub.status.idle": "2024-08-29T20:53:48.899436Z", + "shell.execute_reply": "2024-08-29T20:53:48.899061Z" }, "tags": [] }, @@ -1715,7 +1715,7 @@ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 29, @@ -1747,10 +1747,10 @@ "id": "0c6fde6b-1126-4625-9c6e-7223eb97c30b", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:28:08.049898Z", - "iopub.status.busy": "2024-08-28T17:28:08.049780Z", - "iopub.status.idle": "2024-08-28T17:28:08.052183Z", - "shell.execute_reply": "2024-08-28T17:28:08.051914Z" + "iopub.execute_input": "2024-08-29T20:53:48.903896Z", + "iopub.status.busy": "2024-08-29T20:53:48.903763Z", + "iopub.status.idle": "2024-08-29T20:53:48.906387Z", + "shell.execute_reply": "2024-08-29T20:53:48.906031Z" }, "tags": [] }, diff --git a/tests/notebooks/modin/TimeSeriesTesting.ipynb b/tests/notebooks/modin/TimeSeriesTesting.ipynb index b21dc046b66..e1d7ac54fec 100644 --- a/tests/notebooks/modin/TimeSeriesTesting.ipynb +++ b/tests/notebooks/modin/TimeSeriesTesting.ipynb @@ -3,9 +3,16 @@ { "cell_type": "markdown", "id": "143e5d4a-ca70-4ac8-a61e-be7c93c17d20", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "# Snowpark pandas Time series / date functionality" + "# Snowpark pandas Time series / date functionality\n", + "See https://pandas.pydata.org/docs/user_guide/timeseries.html as a reference." ] }, { @@ -14,10 +21,10 @@ "id": "5ece8277-dc52-40f3-913f-1a3145df6bdc", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:35.535408Z", - "iopub.status.busy": "2024-08-28T17:27:35.535052Z", - "iopub.status.idle": "2024-08-28T17:27:36.951326Z", - "shell.execute_reply": "2024-08-28T17:27:36.950877Z" + "iopub.execute_input": "2024-08-29T21:12:38.130378Z", + "iopub.status.busy": "2024-08-29T21:12:38.130058Z", + "iopub.status.idle": "2024-08-29T21:12:39.788669Z", + "shell.execute_reply": "2024-08-29T21:12:39.787288Z" } }, "outputs": [], @@ -41,10 +48,10 @@ "id": "c127fb50-c570-46fb-a074-6e8eb3ede058", "metadata": { "execution": { - "iopub.execute_input": "2024-08-28T17:27:36.953723Z", - "iopub.status.busy": "2024-08-28T17:27:36.953532Z", - "iopub.status.idle": "2024-08-28T17:27:36.955730Z", - "shell.execute_reply": "2024-08-28T17:27:36.955323Z" + "iopub.execute_input": "2024-08-29T21:12:39.801080Z", + "iopub.status.busy": "2024-08-29T21:12:39.800293Z", + "iopub.status.idle": "2024-08-29T21:12:39.804335Z", + "shell.execute_reply": "2024-08-29T21:12:39.803709Z" } }, "outputs": [], @@ -56,9 +63,15 @@ { "cell_type": "markdown", "id": "02d7afa0-0224-4033-b2c7-465b67642201", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Parsing time series information from various sources and formats" + "##### Parsing time series information from various sources and formats" ] }, { @@ -66,22 +79,19 @@ "execution_count": 3, "id": "8d5f4a0a-fe5c-4a94-94ba-f16d258f92a6", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:36.958090Z", - "iopub.status.busy": "2024-08-28T17:27:36.957962Z", - "iopub.status.idle": "2024-08-28T17:27:37.441073Z", - "shell.execute_reply": "2024-08-28T17:27:37.440761Z" - } + "iopub.execute_input": "2024-08-29T21:12:39.808203Z", + "iopub.status.busy": "2024-08-29T21:12:39.807788Z", + "iopub.status.idle": "2024-08-29T21:12:40.958046Z", + "shell.execute_reply": "2024-08-29T21:12:40.957741Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "`to_datetime` implementation has mismatches with pandas:\n", - "Snowpark pandas to_datetime uses Snowflake's automatic format detection to convert string to datetime when a format is not provided. In this case Snowflake's auto format may yield different result values compared to pandas..\n" - ] - }, { "data": { "text/plain": [ @@ -103,9 +113,15 @@ { "cell_type": "markdown", "id": "1df6a98f-a79b-4e90-b314-0cdf4f94fbfd", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Generate sequences of fixed-frequency dates and time spans" + "##### Generate sequences of fixed-frequency dates and time spans" ] }, { @@ -113,12 +129,17 @@ "execution_count": 4, "id": "28d01637-1093-43ea-a791-bc167243530e", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:37.443821Z", - "iopub.status.busy": "2024-08-28T17:27:37.443686Z", - "iopub.status.idle": "2024-08-28T17:27:37.973506Z", - "shell.execute_reply": "2024-08-28T17:27:37.973040Z" - } + "iopub.execute_input": "2024-08-29T21:12:40.960205Z", + "iopub.status.busy": "2024-08-29T21:12:40.959968Z", + "iopub.status.idle": "2024-08-29T21:12:42.085992Z", + "shell.execute_reply": "2024-08-29T21:12:42.084980Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -142,9 +163,15 @@ { "cell_type": "markdown", "id": "727ff6dd-9af1-4abd-a276-6042bf3b6878", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Manipulating and converting date times with timezone information" + "##### Manipulating and converting date times with timezone information" ] }, { @@ -152,17 +179,42 @@ "execution_count": 5, "id": "1f2c79bc-2a9e-41f0-ab36-44fcc20d119a", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:37.976832Z", - "iopub.status.busy": "2024-08-28T17:27:37.976629Z", - "iopub.status.idle": "2024-08-28T17:27:37.978940Z", - "shell.execute_reply": "2024-08-28T17:27:37.978566Z" - } + "iopub.execute_input": "2024-08-29T21:12:42.091191Z", + "iopub.status.busy": "2024-08-29T21:12:42.090881Z", + "iopub.status.idle": "2024-08-29T21:12:42.289597Z", + "shell.execute_reply": "2024-08-29T21:12:42.289228Z" + }, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] }, - "outputs": [], + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method DatetimeIndex.tz_localize", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# TODO SNOW-783178\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m dti \u001b[38;5;241m=\u001b[39m \u001b[43mdti\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtz_localize\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUTC\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method DatetimeIndex.tz_localize" + ] + } + ], "source": [ - "# TODO SNOW-1635620: uncomment when TimeDelta is implemented\n", - "#dti = dti.tz_localize(\"UTC\")" + "# TODO SNOW-783178\n", + "dti = dti.tz_localize(\"UTC\")" ] }, { @@ -170,25 +222,55 @@ "execution_count": 6, "id": "d7916dfa-9716-47e4-92a8-c3c852a3d802", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:37.981235Z", - "iopub.status.busy": "2024-08-28T17:27:37.981056Z", - "iopub.status.idle": "2024-08-28T17:27:37.983005Z", - "shell.execute_reply": "2024-08-28T17:27:37.982681Z" - } + "iopub.execute_input": "2024-08-29T21:12:42.295088Z", + "iopub.status.busy": "2024-08-29T21:12:42.294902Z", + "iopub.status.idle": "2024-08-29T21:12:42.321172Z", + "shell.execute_reply": "2024-08-29T21:12:42.320846Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] }, - "outputs": [], + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method DatetimeIndex.tz_convert", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[6], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# TODO SNOW-1559264\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m \u001b[43mdti\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtz_convert\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUS/Pacific\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method DatetimeIndex.tz_convert" + ] + } + ], "source": [ - "# TODO SNOW-1635620: uncomment when TimeDelta is implemented\n", - "#dti.tz_convert(\"US/Pacific\")" + "# TODO SNOW-1559264\n", + "dti.tz_convert(\"US/Pacific\")" ] }, { "cell_type": "markdown", "id": "ed3c83b4-e048-4dbb-8d94-25cb0ee62e66", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Resampling or converting a time series to a particular frequency" + "##### Resampling or converting a time series to a particular frequency" ] }, { @@ -196,12 +278,17 @@ "execution_count": 7, "id": "5aa8cd79-521b-42ee-a3a6-66be36603bcb", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:37.985074Z", - "iopub.status.busy": "2024-08-28T17:27:37.984931Z", - "iopub.status.idle": "2024-08-28T17:27:39.127895Z", - "shell.execute_reply": "2024-08-28T17:27:39.127293Z" - } + "iopub.execute_input": "2024-08-29T21:12:42.326598Z", + "iopub.status.busy": "2024-08-29T21:12:42.326454Z", + "iopub.status.idle": "2024-08-29T21:12:44.537136Z", + "shell.execute_reply": "2024-08-29T21:12:44.536402Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -231,12 +318,17 @@ "execution_count": 8, "id": "796c954c-7f60-441b-b85e-1098824fae4b", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:39.131539Z", - "iopub.status.busy": "2024-08-28T17:27:39.131194Z", - "iopub.status.idle": "2024-08-28T17:27:39.966782Z", - "shell.execute_reply": "2024-08-28T17:27:39.966293Z" - } + "iopub.execute_input": "2024-08-29T21:12:44.542723Z", + "iopub.status.busy": "2024-08-29T21:12:44.542354Z", + "iopub.status.idle": "2024-08-29T21:12:46.752298Z", + "shell.execute_reply": "2024-08-29T21:12:46.751393Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -260,9 +352,15 @@ { "cell_type": "markdown", "id": "e873f69d-e8b5-423f-9f0e-b019d37e15df", - "metadata": {}, + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Performing date and time arithmetic with absolute or relative time increments" + "##### Performing date and time arithmetic with absolute or relative time increments" ] }, { @@ -270,12 +368,17 @@ "execution_count": 9, "id": "e7272da8-eae8-4e31-8a61-2f442a6780e0", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:39.969410Z", - "iopub.status.busy": "2024-08-28T17:27:39.969229Z", - "iopub.status.idle": "2024-08-28T17:27:39.972019Z", - "shell.execute_reply": "2024-08-28T17:27:39.971689Z" - } + "iopub.execute_input": "2024-08-29T21:12:46.757227Z", + "iopub.status.busy": "2024-08-29T21:12:46.756719Z", + "iopub.status.idle": "2024-08-29T21:12:46.761430Z", + "shell.execute_reply": "2024-08-29T21:12:46.760778Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -299,12 +402,17 @@ "execution_count": 10, "id": "b69cb16a-9fc7-46fe-a6f6-a6a1ce635dc5", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:39.974318Z", - "iopub.status.busy": "2024-08-28T17:27:39.974172Z", - "iopub.status.idle": "2024-08-28T17:27:39.976711Z", - "shell.execute_reply": "2024-08-28T17:27:39.976411Z" - } + "iopub.execute_input": "2024-08-29T21:12:46.764942Z", + "iopub.status.busy": "2024-08-29T21:12:46.764670Z", + "iopub.status.idle": "2024-08-29T21:12:46.769167Z", + "shell.execute_reply": "2024-08-29T21:12:46.768635Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -319,6 +427,7 @@ } ], "source": [ + "# Add 1 day\n", "saturday = friday + pd.Timedelta(\"1 day\")\n", "saturday.day_name()" ] @@ -328,12 +437,17 @@ "execution_count": 11, "id": "064f4271-9485-497d-b176-b39d4f75248c", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:39.979219Z", - "iopub.status.busy": "2024-08-28T17:27:39.979068Z", - "iopub.status.idle": "2024-08-28T17:27:39.981624Z", - "shell.execute_reply": "2024-08-28T17:27:39.981354Z" - } + "iopub.execute_input": "2024-08-29T21:12:46.773042Z", + "iopub.status.busy": "2024-08-29T21:12:46.772797Z", + "iopub.status.idle": "2024-08-29T21:12:46.776716Z", + "shell.execute_reply": "2024-08-29T21:12:46.776160Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -348,6 +462,7 @@ } ], "source": [ + "# Add 1 business day (Friday --> Monday)\n", "monday = friday + pd.offsets.BDay()\n", "monday.day_name()" ] @@ -357,12 +472,17 @@ "execution_count": 12, "id": "86bf9469-b7d3-44fc-900e-cfd67a065842", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:39.983587Z", - "iopub.status.busy": "2024-08-28T17:27:39.983443Z", - "iopub.status.idle": "2024-08-28T17:27:40.913877Z", - "shell.execute_reply": "2024-08-28T17:27:40.913560Z" - } + "iopub.execute_input": "2024-08-29T21:12:46.779514Z", + "iopub.status.busy": "2024-08-29T21:12:46.779333Z", + "iopub.status.idle": "2024-08-29T21:12:49.324239Z", + "shell.execute_reply": "2024-08-29T21:12:49.319893Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { @@ -388,39 +508,57 @@ }, { "cell_type": "markdown", - "id": "1a42453c-bea7-470a-be57-650f42fea9a5", - "metadata": {}, + "id": "922016ad-e1a7-4d42-a250-05f7ed7894d3", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### Time Series-related instance methods" + "# Overview" ] }, { "cell_type": "markdown", - "id": "86658f09-80b8-42a5-a108-efc4dddfdb09", - "metadata": {}, + "id": "dae7a72f-417b-471a-b353-5b8cab1b7585", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### From timestamps to epoch" + "##### For time series data, it’s conventional to represent the time component in the index of a Series or DataFrame so manipulations can be performed with respect to the time element." ] }, { "cell_type": "code", "execution_count": 13, - "id": "0e4e6063-a60f-4a70-adca-2cb9b3b101f8", + "id": "e763faa7-7806-4d2d-8df7-46a7689af07f", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:40.916090Z", - "iopub.status.busy": "2024-08-28T17:27:40.915940Z", - "iopub.status.idle": "2024-08-28T17:27:41.359184Z", - "shell.execute_reply": "2024-08-28T17:27:41.358781Z" - } + "iopub.execute_input": "2024-08-29T21:12:49.331928Z", + "iopub.status.busy": "2024-08-29T21:12:49.331661Z", + "iopub.status.idle": "2024-08-29T21:12:50.839840Z", + "shell.execute_reply": "2024-08-29T21:12:50.838975Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',\n", - " '2012-10-10 18:15:05', '2012-10-11 18:15:05'],\n", - " dtype='datetime64[ns]', freq=None)" + "2000-01-01 0\n", + "2000-01-02 1\n", + "2000-01-03 2\n", + "Freq: None, dtype: int64" ] }, "execution_count": 13, @@ -429,206 +567,4746 @@ } ], "source": [ - "stamps = pd.date_range(\"2012-10-08 18:15:05\", periods=4, freq=\"D\")\n", - "stamps" + "pd.Series(range(3), index=pd.date_range(\"2000\", freq=\"D\", periods=3))" + ] + }, + { + "cell_type": "markdown", + "id": "61ec1d74-d3d2-4983-8497-2342c7462655", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### However, Series and DataFrame can directly also support the time component as data itself." ] }, { "cell_type": "code", "execution_count": 14, - "id": "f4a38e8b-abcb-49c6-839d-01e4215d7d7a", + "id": "585a116f-14e9-470e-a790-45b0a9de61fe", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:41.361646Z", - "iopub.status.busy": "2024-08-28T17:27:41.361457Z", - "iopub.status.idle": "2024-08-28T17:27:41.363485Z", - "shell.execute_reply": "2024-08-28T17:27:41.363091Z" - } + "iopub.execute_input": "2024-08-29T21:12:50.845071Z", + "iopub.status.busy": "2024-08-29T21:12:50.844702Z", + "iopub.status.idle": "2024-08-29T21:12:51.872836Z", + "shell.execute_reply": "2024-08-29T21:12:51.872201Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0 2000-01-01\n", + "1 2000-01-02\n", + "2 2000-01-03\n", + "dtype: datetime64[ns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# TODO SNOW-1635620: uncomment when TimeDelta is implemented\n", - "# (stamps - pd.Timestamp(\"1970-01-01\")) // pd.Timedelta(\"1s\")" + "pd.Series(pd.date_range(\"2000\", freq=\"D\", periods=3))" ] }, { "cell_type": "markdown", - "id": "9b5b10c8-e72f-4405-bdeb-1bced14c8edf", - "metadata": {}, + "id": "01df1208-2b68-4ccf-b0c3-461ddc08d8b1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, "source": [ - "### DateOffset objects" + "##### Series and DataFrame have extended data type support and functionality for datetime, timedelta and Period data when passed into those constructors. DateOffset data however will be stored as object data." ] }, { "cell_type": "code", "execution_count": 15, - "id": "1febbd6a-1b57-4e6a-a48a-3eac565ad61d", + "id": "6710ed2a-053e-459c-8752-acdf07d5e362", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:51.876484Z", + "iopub.status.busy": "2024-08-29T21:12:51.876122Z", + "iopub.status.idle": "2024-08-29T21:12:52.416975Z", + "shell.execute_reply": "2024-08-29T21:12:52.416640Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "pandas type period[M] is not implemented", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:256\u001b[0m, in \u001b[0;36mTypeMapper.to_snowflake\u001b[0;34m(cls, p)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 256\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mPANDAS_TO_SNOWFLAKE_MAP\u001b[49m\u001b[43m[\u001b[49m\u001b[43mp\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", + "\u001b[0;31mKeyError\u001b[0m: period[M]", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[15], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mperiod_range\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m1/1/2011\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mM\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:156\u001b[0m, in \u001b[0;36mSeries.__init__\u001b[0;34m(self, data, index, dtype, name, copy, fastpath, query_compiler)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28misinstance\u001b[39m(data, (pandas\u001b[38;5;241m.\u001b[39mSeries, pandas\u001b[38;5;241m.\u001b[39mIndex, pd\u001b[38;5;241m.\u001b[39mIndex))\n\u001b[1;32m 152\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m data\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 153\u001b[0m ):\n\u001b[1;32m 154\u001b[0m name \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mname\n\u001b[0;32m--> 156\u001b[0m query_compiler \u001b[38;5;241m=\u001b[39m \u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43mfastpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfastpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m_query_compiler\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_query_compiler \u001b[38;5;241m=\u001b[39m query_compiler\u001b[38;5;241m.\u001b[39mcolumnarize()\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/utils.py:104\u001b[0m, in \u001b[0;36mfrom_pandas\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m# from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msnowflake\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msnowpark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataFrame\n\u001b[0;32m--> 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(query_compiler\u001b[38;5;241m=\u001b[39m\u001b[43mFactoryDispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/dispatcher.py:132\u001b[0m, in \u001b[0;36mFactoryDispatcher.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;129m@_inherit_docstrings\u001b[39m(factories\u001b[38;5;241m.\u001b[39mBaseFactory\u001b[38;5;241m.\u001b[39m_from_pandas)\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_factory\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_from_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/factories.py:172\u001b[0m, in \u001b[0;36mBaseFactory._from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;129m@doc\u001b[39m(\n\u001b[1;32m 166\u001b[0m _doc_io_method_template,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 170\u001b[0m )\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_from_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/io/snow_io.py:177\u001b[0m, in \u001b[0;36mPandasOnSnowflakeIO.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df: pandas\u001b[38;5;241m.\u001b[39mDataFrame):\n\u001b[1;32m 173\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"invoke construction from pandas DataFrame (io backup methods), df is a pandas.DataFrame living in main-memory\u001b[39;00m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;124;03m df: An existing (native) pandas DataFrame\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 177\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery_compiler_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:584\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_pandas\u001b[0;34m(cls, df, *args, **kwargs)\u001b[0m\n\u001b[1;32m 579\u001b[0m current_df_data_column_snowflake_quoted_identifiers\u001b[38;5;241m.\u001b[39mappend(\n\u001b[1;32m 580\u001b[0m row_position_snowflake_quoted_identifier\n\u001b[1;32m 581\u001b[0m )\n\u001b[1;32m 583\u001b[0m \u001b[38;5;66;03m# create snowpark df\u001b[39;00m\n\u001b[0;32m--> 584\u001b[0m snowpark_pandas_types, snowpark_types \u001b[38;5;241m=\u001b[39m \u001b[43minfer_snowpark_types_from_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 585\u001b[0m ordered_dataframe \u001b[38;5;241m=\u001b[39m create_ordered_dataframe_from_pandas(\n\u001b[1;32m 586\u001b[0m df,\n\u001b[1;32m 587\u001b[0m snowflake_quoted_identifiers\u001b[38;5;241m=\u001b[39mcurrent_df_data_column_snowflake_quoted_identifiers,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 592\u001b[0m row_position_snowflake_quoted_identifier\u001b[38;5;241m=\u001b[39mrow_position_snowflake_quoted_identifier,\n\u001b[1;32m 593\u001b[0m )\n\u001b[1;32m 595\u001b[0m \u001b[38;5;66;03m# construct the internal frame for the dataframe\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/utils.py:1051\u001b[0m, in \u001b[0;36minfer_snowpark_types_from_pandas\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 1049\u001b[0m snowpark_types \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _, column \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m-> 1051\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m \u001b[43minfer_series_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(snowflake_type, SnowparkPandasType):\n\u001b[1;32m 1053\u001b[0m snowpark_types\u001b[38;5;241m.\u001b[39mappend(snowflake_type\u001b[38;5;241m.\u001b[39msnowpark_type)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:199\u001b[0m, in \u001b[0;36minfer_series_type\u001b[0;34m(series)\u001b[0m\n\u001b[1;32m 197\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m VariantType()\n\u001b[1;32m 198\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 199\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m \u001b[43mTypeMapper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_snowflake\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m snowflake_type\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:258\u001b[0m, in \u001b[0;36mTypeMapper.to_snowflake\u001b[0;34m(cls, p)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m PANDAS_TO_SNOWFLAKE_MAP[p]\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[0;32m--> 258\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpandas type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mp\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not implemented\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: pandas type period[M] is not implemented" + ] + } + ], + "source": [ + "pd.Series(pd.period_range(\"1/1/2011\", freq=\"M\", periods=3))" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "2ce1f555-cb1d-46e6-a0ee-831a90dbd594", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:41.365775Z", - "iopub.status.busy": "2024-08-28T17:27:41.365599Z", - "iopub.status.idle": "2024-08-28T17:27:41.413954Z", - "shell.execute_reply": "2024-08-28T17:27:41.413683Z" + "iopub.execute_input": "2024-08-29T21:12:52.421484Z", + "iopub.status.busy": "2024-08-29T21:12:52.421364Z", + "iopub.status.idle": "2024-08-29T21:12:52.657344Z", + "shell.execute_reply": "2024-08-29T21:12:52.656976Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Object of type DateOffset is not JSON serializable", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDateOffset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDateOffset\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:156\u001b[0m, in \u001b[0;36mSeries.__init__\u001b[0;34m(self, data, index, dtype, name, copy, fastpath, query_compiler)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28misinstance\u001b[39m(data, (pandas\u001b[38;5;241m.\u001b[39mSeries, pandas\u001b[38;5;241m.\u001b[39mIndex, pd\u001b[38;5;241m.\u001b[39mIndex))\n\u001b[1;32m 152\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m data\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 153\u001b[0m ):\n\u001b[1;32m 154\u001b[0m name \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mname\n\u001b[0;32m--> 156\u001b[0m query_compiler \u001b[38;5;241m=\u001b[39m \u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43mfastpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfastpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m_query_compiler\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_query_compiler \u001b[38;5;241m=\u001b[39m query_compiler\u001b[38;5;241m.\u001b[39mcolumnarize()\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/utils.py:104\u001b[0m, in \u001b[0;36mfrom_pandas\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m# from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msnowflake\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msnowpark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataFrame\n\u001b[0;32m--> 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(query_compiler\u001b[38;5;241m=\u001b[39m\u001b[43mFactoryDispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/dispatcher.py:132\u001b[0m, in \u001b[0;36mFactoryDispatcher.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;129m@_inherit_docstrings\u001b[39m(factories\u001b[38;5;241m.\u001b[39mBaseFactory\u001b[38;5;241m.\u001b[39m_from_pandas)\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_factory\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_from_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/factories.py:172\u001b[0m, in \u001b[0;36mBaseFactory._from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;129m@doc\u001b[39m(\n\u001b[1;32m 166\u001b[0m _doc_io_method_template,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 170\u001b[0m )\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_from_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/io/snow_io.py:177\u001b[0m, in \u001b[0;36mPandasOnSnowflakeIO.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df: pandas\u001b[38;5;241m.\u001b[39mDataFrame):\n\u001b[1;32m 173\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"invoke construction from pandas DataFrame (io backup methods), df is a pandas.DataFrame living in main-memory\u001b[39;00m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;124;03m df: An existing (native) pandas DataFrame\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 177\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery_compiler_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:585\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_pandas\u001b[0;34m(cls, df, *args, **kwargs)\u001b[0m\n\u001b[1;32m 583\u001b[0m \u001b[38;5;66;03m# create snowpark df\u001b[39;00m\n\u001b[1;32m 584\u001b[0m snowpark_pandas_types, snowpark_types \u001b[38;5;241m=\u001b[39m infer_snowpark_types_from_pandas(df)\n\u001b[0;32m--> 585\u001b[0m ordered_dataframe \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_ordered_dataframe_from_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 586\u001b[0m \u001b[43m \u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 587\u001b[0m \u001b[43m \u001b[49m\u001b[43msnowflake_quoted_identifiers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurrent_df_data_column_snowflake_quoted_identifiers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 588\u001b[0m \u001b[43m \u001b[49m\u001b[43msnowpark_types\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msnowpark_types\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 589\u001b[0m \u001b[43m \u001b[49m\u001b[43mordering_columns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 590\u001b[0m \u001b[43m \u001b[49m\u001b[43mOrderingColumn\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrow_position_snowflake_quoted_identifier\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 591\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 592\u001b[0m \u001b[43m \u001b[49m\u001b[43mrow_position_snowflake_quoted_identifier\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrow_position_snowflake_quoted_identifier\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 593\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 595\u001b[0m \u001b[38;5;66;03m# construct the internal frame for the dataframe\u001b[39;00m\n\u001b[1;32m 596\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mcls\u001b[39m(\n\u001b[1;32m 597\u001b[0m InternalFrame\u001b[38;5;241m.\u001b[39mcreate(\n\u001b[1;32m 598\u001b[0m ordered_dataframe\u001b[38;5;241m=\u001b[39mordered_dataframe,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 617\u001b[0m )\n\u001b[1;32m 618\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/utils.py:1114\u001b[0m, in \u001b[0;36mcreate_ordered_dataframe_from_pandas\u001b[0;34m(df, snowflake_quoted_identifiers, snowpark_types, ordering_columns, row_position_snowflake_quoted_identifier)\u001b[0m\n\u001b[1;32m 1103\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m y \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(data[x])):\n\u001b[1;32m 1104\u001b[0m data[x][y] \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1105\u001b[0m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1106\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m isna_data[x][y]\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1111\u001b[0m )\n\u001b[1;32m 1112\u001b[0m )\n\u001b[0;32m-> 1114\u001b[0m snowpark_df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcreate_dataframe\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1115\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1116\u001b[0m \u001b[43m \u001b[49m\u001b[43mschema\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStructType\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1117\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\n\u001b[1;32m 1118\u001b[0m \u001b[43m \u001b[49m\u001b[43mStructField\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumn_identifier\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mid\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdatatype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meach_datatype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1119\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mid\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43meach_datatype\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mzip\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1120\u001b[0m \u001b[43m \u001b[49m\u001b[43msnowflake_quoted_identifiers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msnowpark_types\u001b[49m\n\u001b[1;32m 1121\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1122\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 1123\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1124\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m OrderedDataFrame(\n\u001b[1;32m 1126\u001b[0m DataFrameReference(snowpark_df, snowflake_quoted_identifiers),\n\u001b[1;32m 1127\u001b[0m projected_column_snowflake_quoted_identifiers\u001b[38;5;241m=\u001b[39msnowflake_quoted_identifiers,\n\u001b[1;32m 1128\u001b[0m ordering_columns\u001b[38;5;241m=\u001b[39mordering_columns,\n\u001b[1;32m 1129\u001b[0m row_position_snowflake_quoted_identifier\u001b[38;5;241m=\u001b[39mrow_position_snowflake_quoted_identifier,\n\u001b[1;32m 1130\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/session.py:2807\u001b[0m, in \u001b[0;36mSession.create_dataframe\u001b[0;34m(self, data, schema)\u001b[0m\n\u001b[1;32m 2805\u001b[0m converted_row\u001b[38;5;241m.\u001b[39mappend(json\u001b[38;5;241m.\u001b[39mdumps(value, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m=\u001b[39mPythonObjJSONEncoder))\n\u001b[1;32m 2806\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_type, VariantType):\n\u001b[0;32m-> 2807\u001b[0m converted_row\u001b[38;5;241m.\u001b[39mappend(\u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdumps\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mPythonObjJSONEncoder\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 2808\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_type, GeographyType):\n\u001b[1;32m 2809\u001b[0m converted_row\u001b[38;5;241m.\u001b[39mappend(value)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/json/__init__.py:234\u001b[0m, in \u001b[0;36mdumps\u001b[0;34m(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 233\u001b[0m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONEncoder\n\u001b[0;32m--> 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43mskipkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mskipkeys\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mensure_ascii\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mensure_ascii\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_circular\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_circular\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mallow_nan\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mallow_nan\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mseparators\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mseparators\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdefault\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msort_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/json/encoder.py:199\u001b[0m, in \u001b[0;36mJSONEncoder.encode\u001b[0;34m(self, o)\u001b[0m\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m encode_basestring(o)\n\u001b[1;32m 196\u001b[0m \u001b[38;5;66;03m# This doesn't pass the iterator directly to ''.join() because the\u001b[39;00m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;66;03m# exceptions aren't as detailed. The list call should be roughly\u001b[39;00m\n\u001b[1;32m 198\u001b[0m \u001b[38;5;66;03m# equivalent to the PySequence_Fast that ''.join() would do.\u001b[39;00m\n\u001b[0;32m--> 199\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43miterencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_one_shot\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(chunks, (\u001b[38;5;28mlist\u001b[39m, \u001b[38;5;28mtuple\u001b[39m)):\n\u001b[1;32m 201\u001b[0m chunks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlist\u001b[39m(chunks)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/json/encoder.py:257\u001b[0m, in \u001b[0;36mJSONEncoder.iterencode\u001b[0;34m(self, o, _one_shot)\u001b[0m\n\u001b[1;32m 252\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 253\u001b[0m _iterencode \u001b[38;5;241m=\u001b[39m _make_iterencode(\n\u001b[1;32m 254\u001b[0m markers, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdefault, _encoder, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindent, floatstr,\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkey_separator, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mitem_separator, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msort_keys,\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mskipkeys, _one_shot)\n\u001b[0;32m--> 257\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_iterencode\u001b[49m\u001b[43m(\u001b[49m\u001b[43mo\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/utils.py:651\u001b[0m, in \u001b[0;36mPythonObjJSONEncoder.default\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 649\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m value\u001b[38;5;241m.\u001b[39mtolist()\n\u001b[1;32m 650\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 651\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdefault\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/json/encoder.py:179\u001b[0m, in \u001b[0;36mJSONEncoder.default\u001b[0;34m(self, o)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdefault\u001b[39m(\u001b[38;5;28mself\u001b[39m, o):\n\u001b[1;32m 161\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Implement this method in a subclass such that it returns\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;124;03m a serializable object for ``o``, or calls the base implementation\u001b[39;00m\n\u001b[1;32m 163\u001b[0m \u001b[38;5;124;03m (to raise a ``TypeError``).\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 177\u001b[0m \n\u001b[1;32m 178\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 179\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mObject of type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mo\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mis not JSON serializable\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", + "\u001b[0;31mTypeError\u001b[0m: Object of type DateOffset is not JSON serializable" + ] } + ], + "source": [ + "pd.Series([pd.DateOffset(1), pd.DateOffset(2)])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "a6befac8-06ca-45c2-ade8-03736ac7f7ed", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:52.663945Z", + "iopub.status.busy": "2024-08-29T21:12:52.663811Z", + "iopub.status.idle": "2024-08-29T21:12:53.689964Z", + "shell.execute_reply": "2024-08-29T21:12:53.689137Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')" + "0 2011-01-31\n", + "1 2011-02-28\n", + "2 2011-03-31\n", + "dtype: datetime64[ns]" ] }, - "execution_count": 15, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ts = pd.Timestamp(\"2016-10-30 00:00:00\", tz=\"Europe/Helsinki\")\n", - "\n", - "ts + pd.Timedelta(days=1)" + "pd.Series(pd.date_range(\"1/1/2011\", freq=\"ME\", periods=3))" + ] + }, + { + "cell_type": "markdown", + "id": "a433d9b6-cfa8-4a1a-bc65-7253a0ab1fc1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Lastly, pandas represents null date times, time deltas, and time spans as NaT which is useful for representing missing or null date like values and behaves similar as np.nan does for float data." ] }, { "cell_type": "code", - "execution_count": 16, - "id": "b8d03fb0-826f-4698-a6d0-f2b63f7d38dc", + "execution_count": 18, + "id": "cf9b85c2-c342-492a-8bef-d8dad541c8d2", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:41.415634Z", - "iopub.status.busy": "2024-08-28T17:27:41.415523Z", - "iopub.status.idle": "2024-08-28T17:27:41.417793Z", - "shell.execute_reply": "2024-08-28T17:27:41.417561Z" - } + "iopub.execute_input": "2024-08-29T21:12:53.694436Z", + "iopub.status.busy": "2024-08-29T21:12:53.694067Z", + "iopub.status.idle": "2024-08-29T21:12:53.699310Z", + "shell.execute_reply": "2024-08-29T21:12:53.698757Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')" + "NaT" ] }, - "execution_count": 16, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "ts + pd.DateOffset(days=1)" + "pd.Timestamp(pd.NaT)" ] }, { - "cell_type": "markdown", - "id": "00d1f5cf-c073-4c60-949b-404953b80000", - "metadata": {}, + "cell_type": "code", + "execution_count": 19, + "id": "28c9907e-eaef-44c1-8a96-2e4a8b0451dc", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:53.704104Z", + "iopub.status.busy": "2024-08-29T21:12:53.703792Z", + "iopub.status.idle": "2024-08-29T21:12:53.707678Z", + "shell.execute_reply": "2024-08-29T21:12:53.707062Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "NaT" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "### Timestamp Binary Operations" + "pd.Timedelta(pd.NaT)" ] }, { "cell_type": "code", - "execution_count": 17, - "id": "dd818a8d-97c1-46f3-b29f-499ba92f22ae", + "execution_count": 20, + "id": "7ba5293a-b234-4a4a-a276-f5b9ebc6512c", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:41.419736Z", - "iopub.status.busy": "2024-08-28T17:27:41.419620Z", - "iopub.status.idle": "2024-08-28T17:27:42.097520Z", - "shell.execute_reply": "2024-08-28T17:27:42.097100Z" - } + "iopub.execute_input": "2024-08-29T21:12:53.711155Z", + "iopub.status.busy": "2024-08-29T21:12:53.710940Z", + "iopub.status.idle": "2024-08-29T21:12:53.714655Z", + "shell.execute_reply": "2024-08-29T21:12:53.713901Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timedelta('396 days 03:00:00')" + "NaT" ] }, - "execution_count": 17, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pd.to_datetime('2018-10-26 12:00:00') - pd.to_datetime('2017-09-25 09:00:00')" + "pd.Period(pd.NaT)" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "7c9a87d2-7883-46a6-8433-dfa5900ca9b0", + "execution_count": 21, + "id": "41ff9cef-5957-45ac-b9d6-994ffb813cf3", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:42.099835Z", - "iopub.status.busy": "2024-08-28T17:27:42.099657Z", - "iopub.status.idle": "2024-08-28T17:27:42.102502Z", - "shell.execute_reply": "2024-08-28T17:27:42.102144Z" - } + "iopub.execute_input": "2024-08-29T21:12:53.718345Z", + "iopub.status.busy": "2024-08-29T21:12:53.718130Z", + "iopub.status.idle": "2024-08-29T21:12:53.721214Z", + "shell.execute_reply": "2024-08-29T21:12:53.720794Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timedelta('6 days 07:00:00')" + "False" ] }, - "execution_count": 18, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pd.Timestamp(\"2014-08-01 10:00\") - pd.Timestamp(\"2014-07-26 03:00\")" + "# Equality acts as np.nan would\n", + "pd.NaT == pd.NaT" + ] + }, + { + "cell_type": "markdown", + "id": "55cdacb3-46ca-465e-869a-f8887852e401", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Timestamps vs. time spans" + ] + }, + { + "cell_type": "markdown", + "id": "52c2704d-375b-42ff-9b9c-e48883d3b43f", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Timestamp and Period can serve as an index. Lists of Timestamp and Period are automatically coerced to DatetimeIndex and PeriodIndex respectively." ] }, { "cell_type": "code", - "execution_count": 19, - "id": "e78454b1-0d4c-42bc-a127-b21a4a7f09cf", + "execution_count": 22, + "id": "2b248408-76d5-4d95-9dc5-164ba9532309", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:42.104922Z", - "iopub.status.busy": "2024-08-28T17:27:42.104781Z", - "iopub.status.idle": "2024-08-28T17:27:42.107600Z", - "shell.execute_reply": "2024-08-28T17:27:42.107293Z" - } + "iopub.execute_input": "2024-08-29T21:12:53.724462Z", + "iopub.status.busy": "2024-08-29T21:12:53.724294Z", + "iopub.status.idle": "2024-08-29T21:12:54.022295Z", + "shell.execute_reply": "2024-08-29T21:12:54.020998Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timedelta('682 days 03:00:00')" + "snowflake.snowpark.modin.plugin.extensions.datetime_index.DatetimeIndex" ] }, - "execution_count": 19, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pd.Timestamp(year=2017, month=1, day=1, hour=12) - pd.Timestamp(year=2015, month=2, day=19, hour=9)" + "dates = [\n", + " pd.Timestamp(\"2012-05-01\"),\n", + " pd.Timestamp(\"2012-05-02\"),\n", + " pd.Timestamp(\"2012-05-03\"),\n", + "]\n", + "\n", + "\n", + "ts = pd.Series(np.random.randn(3), dates)\n", + "\n", + "type(ts.index)" ] }, { "cell_type": "code", - "execution_count": 20, - "id": "2534d141-1862-4901-ba70-7ed73ab9abdd", + "execution_count": 23, + "id": "c7cca402-6c5f-475a-aab7-77957f656690", "metadata": { + "editable": true, "execution": { - "iopub.execute_input": "2024-08-28T17:27:42.109761Z", - "iopub.status.busy": "2024-08-28T17:27:42.109628Z", - "iopub.status.idle": "2024-08-28T17:27:42.763799Z", - "shell.execute_reply": "2024-08-28T17:27:42.763158Z" - } + "iopub.execute_input": "2024-08-29T21:12:54.033215Z", + "iopub.status.busy": "2024-08-29T21:12:54.032640Z", + "iopub.status.idle": "2024-08-29T21:12:54.427276Z", + "shell.execute_reply": "2024-08-29T21:12:54.426196Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "Timedelta('-31 days +03:09:02')" + "DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)" ] }, - "execution_count": 20, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts.index" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "b88e7085-4cfd-456d-8422-f440ba02ed27", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:54.432281Z", + "iopub.status.busy": "2024-08-29T21:12:54.431935Z", + "iopub.status.idle": "2024-08-29T21:12:54.880207Z", + "shell.execute_reply": "2024-08-29T21:12:54.879027Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2012-05-01 0.806339\n", + "2012-05-02 0.151004\n", + "2012-05-03 0.198380\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "126e9c1c-0006-44a7-a05d-a8ccb0aca029", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:54.886147Z", + "iopub.status.busy": "2024-08-29T21:12:54.885761Z", + "iopub.status.idle": "2024-08-29T21:12:54.952853Z", + "shell.execute_reply": "2024-08-29T21:12:54.952527Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "pandas type period[M] is not implemented", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:256\u001b[0m, in \u001b[0;36mTypeMapper.to_snowflake\u001b[0;34m(cls, p)\u001b[0m\n\u001b[1;32m 255\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 256\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mPANDAS_TO_SNOWFLAKE_MAP\u001b[49m\u001b[43m[\u001b[49m\u001b[43mp\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n", + "\u001b[0;31mKeyError\u001b[0m: period[M]", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[25], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m periods \u001b[38;5;241m=\u001b[39m [pd\u001b[38;5;241m.\u001b[39mPeriod(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-01\u001b[39m\u001b[38;5;124m\"\u001b[39m), pd\u001b[38;5;241m.\u001b[39mPeriod(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-02\u001b[39m\u001b[38;5;124m\"\u001b[39m), pd\u001b[38;5;241m.\u001b[39mPeriod(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-03\u001b[39m\u001b[38;5;124m\"\u001b[39m)]\n\u001b[0;32m----> 3\u001b[0m ts \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mtype\u001b[39m(ts\u001b[38;5;241m.\u001b[39mindex)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:156\u001b[0m, in \u001b[0;36mSeries.__init__\u001b[0;34m(self, data, index, dtype, name, copy, fastpath, query_compiler)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28misinstance\u001b[39m(data, (pandas\u001b[38;5;241m.\u001b[39mSeries, pandas\u001b[38;5;241m.\u001b[39mIndex, pd\u001b[38;5;241m.\u001b[39mIndex))\n\u001b[1;32m 152\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m data\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 153\u001b[0m ):\n\u001b[1;32m 154\u001b[0m name \u001b[38;5;241m=\u001b[39m data\u001b[38;5;241m.\u001b[39mname\n\u001b[0;32m--> 156\u001b[0m query_compiler \u001b[38;5;241m=\u001b[39m \u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 157\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 158\u001b[0m \u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mSeries\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 159\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtry_convert_index_to_native\u001b[49m\u001b[43m(\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 161\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 162\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 164\u001b[0m \u001b[43m \u001b[49m\u001b[43mfastpath\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfastpath\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 166\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 167\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m_query_compiler\n\u001b[1;32m 168\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_query_compiler \u001b[38;5;241m=\u001b[39m query_compiler\u001b[38;5;241m.\u001b[39mcolumnarize()\n\u001b[1;32m 169\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/utils.py:104\u001b[0m, in \u001b[0;36mfrom_pandas\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 101\u001b[0m \u001b[38;5;66;03m# from modin.core.execution.dispatching.factories.dispatcher import FactoryDispatcher\u001b[39;00m\n\u001b[1;32m 102\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msnowflake\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msnowpark\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmodin\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DataFrame\n\u001b[0;32m--> 104\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m DataFrame(query_compiler\u001b[38;5;241m=\u001b[39m\u001b[43mFactoryDispatcher\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/dispatcher.py:132\u001b[0m, in \u001b[0;36mFactoryDispatcher.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 129\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 130\u001b[0m \u001b[38;5;129m@_inherit_docstrings\u001b[39m(factories\u001b[38;5;241m.\u001b[39mBaseFactory\u001b[38;5;241m.\u001b[39m_from_pandas)\n\u001b[1;32m 131\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 132\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_factory\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_from_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/core/execution/dispatching/factories/factories.py:172\u001b[0m, in \u001b[0;36mBaseFactory._from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;129m@doc\u001b[39m(\n\u001b[1;32m 166\u001b[0m _doc_io_method_template,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 170\u001b[0m )\n\u001b[1;32m 171\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_from_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df):\n\u001b[0;32m--> 172\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mio_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/io/snow_io.py:177\u001b[0m, in \u001b[0;36mPandasOnSnowflakeIO.from_pandas\u001b[0;34m(cls, df)\u001b[0m\n\u001b[1;32m 171\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 172\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_pandas\u001b[39m(\u001b[38;5;28mcls\u001b[39m, df: pandas\u001b[38;5;241m.\u001b[39mDataFrame):\n\u001b[1;32m 173\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"invoke construction from pandas DataFrame (io backup methods), df is a pandas.DataFrame living in main-memory\u001b[39;00m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;124;03m df: An existing (native) pandas DataFrame\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 177\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery_compiler_cls\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:584\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_pandas\u001b[0;34m(cls, df, *args, **kwargs)\u001b[0m\n\u001b[1;32m 579\u001b[0m current_df_data_column_snowflake_quoted_identifiers\u001b[38;5;241m.\u001b[39mappend(\n\u001b[1;32m 580\u001b[0m row_position_snowflake_quoted_identifier\n\u001b[1;32m 581\u001b[0m )\n\u001b[1;32m 583\u001b[0m \u001b[38;5;66;03m# create snowpark df\u001b[39;00m\n\u001b[0;32m--> 584\u001b[0m snowpark_pandas_types, snowpark_types \u001b[38;5;241m=\u001b[39m \u001b[43minfer_snowpark_types_from_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 585\u001b[0m ordered_dataframe \u001b[38;5;241m=\u001b[39m create_ordered_dataframe_from_pandas(\n\u001b[1;32m 586\u001b[0m df,\n\u001b[1;32m 587\u001b[0m snowflake_quoted_identifiers\u001b[38;5;241m=\u001b[39mcurrent_df_data_column_snowflake_quoted_identifiers,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 592\u001b[0m row_position_snowflake_quoted_identifier\u001b[38;5;241m=\u001b[39mrow_position_snowflake_quoted_identifier,\n\u001b[1;32m 593\u001b[0m )\n\u001b[1;32m 595\u001b[0m \u001b[38;5;66;03m# construct the internal frame for the dataframe\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/utils.py:1051\u001b[0m, in \u001b[0;36minfer_snowpark_types_from_pandas\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 1049\u001b[0m snowpark_types \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m _, column \u001b[38;5;129;01min\u001b[39;00m df\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m-> 1051\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m \u001b[43minfer_series_type\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumn\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(snowflake_type, SnowparkPandasType):\n\u001b[1;32m 1053\u001b[0m snowpark_types\u001b[38;5;241m.\u001b[39mappend(snowflake_type\u001b[38;5;241m.\u001b[39msnowpark_type)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:199\u001b[0m, in \u001b[0;36minfer_series_type\u001b[0;34m(series)\u001b[0m\n\u001b[1;32m 197\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m VariantType()\n\u001b[1;32m 198\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 199\u001b[0m snowflake_type \u001b[38;5;241m=\u001b[39m \u001b[43mTypeMapper\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_snowflake\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_type\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 201\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m snowflake_type\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/type_utils.py:258\u001b[0m, in \u001b[0;36mTypeMapper.to_snowflake\u001b[0;34m(cls, p)\u001b[0m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m PANDAS_TO_SNOWFLAKE_MAP[p]\n\u001b[1;32m 257\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m:\n\u001b[0;32m--> 258\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpandas type \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mp\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not implemented\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: pandas type period[M] is not implemented" + ] + } + ], + "source": [ + "periods = [pd.Period(\"2012-01\"), pd.Period(\"2012-02\"), pd.Period(\"2012-03\")]\n", + "\n", + "ts = pd.Series(np.random.randn(3), periods)\n", + "\n", + "type(ts.index)" + ] + }, + { + "cell_type": "markdown", + "id": "f927521e-535f-4d26-8a4c-d8fda7319000", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Converting to timestamps" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "bad4e888-44cd-4c49-8c21-be7590a8ae3f", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:54.957350Z", + "iopub.status.busy": "2024-08-29T21:12:54.957223Z", + "iopub.status.idle": "2024-08-29T21:12:56.239436Z", + "shell.execute_reply": "2024-08-29T21:12:56.239161Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:snowflake.snowpark.modin.plugin.utils.warning_message:`to_datetime` implementation may have mismatches with pandas:\n", + "Snowflake automatic format detection is used when a format is not provided.In this case Snowflake's auto format may yield different result values compared to pandas.See https://docs.snowflake.com/en/sql-reference/date-time-input-output#supported-formats-for-auto-detection for details.\n" + ] + }, + { + "ename": "SnowparkSQLException", + "evalue": "(1304): 01b6ae78-0d07-1a71-0002-990387e5d42b: 100035 (22007): Timestamp 'Jul 31, 2009' is not recognized", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mSnowparkSQLException\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/IPython/core/formatters.py:708\u001b[0m, in \u001b[0;36mPlainTextFormatter.__call__\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 701\u001b[0m stream \u001b[38;5;241m=\u001b[39m StringIO()\n\u001b[1;32m 702\u001b[0m printer \u001b[38;5;241m=\u001b[39m pretty\u001b[38;5;241m.\u001b[39mRepresentationPrinter(stream, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose,\n\u001b[1;32m 703\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_width, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnewline,\n\u001b[1;32m 704\u001b[0m max_seq_length\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmax_seq_length,\n\u001b[1;32m 705\u001b[0m singleton_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msingleton_printers,\n\u001b[1;32m 706\u001b[0m type_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtype_printers,\n\u001b[1;32m 707\u001b[0m deferred_pprinters\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdeferred_printers)\n\u001b[0;32m--> 708\u001b[0m \u001b[43mprinter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpretty\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 709\u001b[0m printer\u001b[38;5;241m.\u001b[39mflush()\n\u001b[1;32m 710\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m stream\u001b[38;5;241m.\u001b[39mgetvalue()\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/IPython/lib/pretty.py:410\u001b[0m, in \u001b[0;36mRepresentationPrinter.pretty\u001b[0;34m(self, obj)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m meth(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mobject\u001b[39m \\\n\u001b[1;32m 409\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__dict__\u001b[39m\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m__repr__\u001b[39m\u001b[38;5;124m'\u001b[39m)):\n\u001b[0;32m--> 410\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_repr_pprint\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcycle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _default_pprint(obj, \u001b[38;5;28mself\u001b[39m, cycle)\n\u001b[1;32m 413\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/IPython/lib/pretty.py:778\u001b[0m, in \u001b[0;36m_repr_pprint\u001b[0;34m(obj, p, cycle)\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A pprint that just redirects to the normal repr function.\"\"\"\u001b[39;00m\n\u001b[1;32m 777\u001b[0m \u001b[38;5;66;03m# Find newlines and replace them with p.break_()\u001b[39;00m\n\u001b[0;32m--> 778\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mrepr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 779\u001b[0m lines \u001b[38;5;241m=\u001b[39m output\u001b[38;5;241m.\u001b[39msplitlines()\n\u001b[1;32m 780\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m p\u001b[38;5;241m.\u001b[39mgroup():\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:477\u001b[0m, in \u001b[0;36mSeries.__repr__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 470\u001b[0m num_rows \u001b[38;5;241m=\u001b[39m pandas\u001b[38;5;241m.\u001b[39mget_option(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisplay.max_rows\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m60\u001b[39m\n\u001b[1;32m 471\u001b[0m num_cols \u001b[38;5;241m=\u001b[39m pandas\u001b[38;5;241m.\u001b[39mget_option(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisplay.max_columns\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;241m20\u001b[39m\n\u001b[1;32m 473\u001b[0m (\n\u001b[1;32m 474\u001b[0m row_count,\n\u001b[1;32m 475\u001b[0m col_count,\n\u001b[1;32m 476\u001b[0m temp_df,\n\u001b[0;32m--> 477\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query_compiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild_repr_df\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnum_rows\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnum_cols\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 478\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(temp_df, pandas\u001b[38;5;241m.\u001b[39mDataFrame) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m temp_df\u001b[38;5;241m.\u001b[39mempty:\n\u001b[1;32m 479\u001b[0m temp_df \u001b[38;5;241m=\u001b[39m temp_df\u001b[38;5;241m.\u001b[39miloc[:, \u001b[38;5;241m0\u001b[39m]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:11699\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.build_repr_df\u001b[0;34m(self, num_rows_to_display, num_cols_to_display, times_symbol)\u001b[0m\n\u001b[1;32m 11697\u001b[0m \u001b[38;5;66;03m# retrieve frame as pandas object\u001b[39;00m\n\u001b[1;32m 11698\u001b[0m new_qc \u001b[38;5;241m=\u001b[39m SnowflakeQueryCompiler(new_frame)\n\u001b[0;32m> 11699\u001b[0m pandas_frame \u001b[38;5;241m=\u001b[39m \u001b[43mnew_qc\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 11701\u001b[0m \u001b[38;5;66;03m# remove last column after first retrieving row count\u001b[39;00m\n\u001b[1;32m 11702\u001b[0m row_count \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;241m0\u001b[39m \u001b[38;5;241m==\u001b[39m \u001b[38;5;28mlen\u001b[39m(pandas_frame) \u001b[38;5;28;01melse\u001b[39;00m pandas_frame\u001b[38;5;241m.\u001b[39miat[\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:754\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.to_pandas\u001b[0;34m(self, statement_params, **kwargs)\u001b[0m\n\u001b[1;32m 737\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mto_pandas\u001b[39m(\n\u001b[1;32m 738\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 739\u001b[0m \u001b[38;5;241m*\u001b[39m,\n\u001b[1;32m 740\u001b[0m statement_params: Optional[\u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mstr\u001b[39m]] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 741\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 742\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m native_pd\u001b[38;5;241m.\u001b[39mDataFrame:\n\u001b[1;32m 743\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 744\u001b[0m \u001b[38;5;124;03m Convert underlying query compilers data to ``pandas.DataFrame``.\u001b[39;00m\n\u001b[1;32m 745\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 752\u001b[0m \n\u001b[1;32m 753\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 754\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_modin_frame\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstatement_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/frame.py:895\u001b[0m, in \u001b[0;36mInternalFrame.to_pandas\u001b[0;34m(self, statement_params, **kwargs)\u001b[0m\n\u001b[1;32m 880\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 881\u001b[0m \u001b[38;5;124;03mConvert this InternalFrame to ``pandas.DataFrame``.\u001b[39;00m\n\u001b[1;32m 882\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 888\u001b[0m \u001b[38;5;124;03m The InternalFrame converted to pandas.\u001b[39;00m\n\u001b[1;32m 889\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 890\u001b[0m ordered_dataframe \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mordered_dataframe\u001b[38;5;241m.\u001b[39mselect(\n\u001b[1;32m 891\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex_column_snowflake_quoted_identifiers\n\u001b[1;32m 892\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdata_column_snowflake_quoted_identifiers\n\u001b[1;32m 893\u001b[0m )\n\u001b[0;32m--> 895\u001b[0m native_df \u001b[38;5;241m=\u001b[39m \u001b[43msnowpark_to_pandas_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 896\u001b[0m \u001b[43m \u001b[49m\u001b[43mordered_dataframe\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 897\u001b[0m \u001b[43m \u001b[49m\u001b[43mstatement_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstatement_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 898\u001b[0m \u001b[43m \u001b[49m\u001b[43mcached_snowpark_pandas_types\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcached_index_column_snowpark_pandas_types\u001b[49m\n\u001b[1;32m 899\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcached_data_column_snowpark_pandas_types\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 900\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 901\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 903\u001b[0m \u001b[38;5;66;03m# to_pandas() does not preserve the index information and will just return a\u001b[39;00m\n\u001b[1;32m 904\u001b[0m \u001b[38;5;66;03m# RangeIndex. Therefore, we need to set the index column manually\u001b[39;00m\n\u001b[1;32m 905\u001b[0m native_df\u001b[38;5;241m.\u001b[39mset_index(\n\u001b[1;32m 906\u001b[0m [\n\u001b[1;32m 907\u001b[0m extract_pandas_label_from_snowflake_quoted_identifier(identifier)\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 910\u001b[0m inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 911\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/utils.py:1334\u001b[0m, in \u001b[0;36msnowpark_to_pandas_helper\u001b[0;34m(ordered_dataframe, cached_snowpark_pandas_types, statement_params, **kwargs)\u001b[0m\n\u001b[1;32m 1329\u001b[0m \u001b[38;5;66;03m# ensure that snowpark_df has unique identifiers, so the native pandas DataFrame object created here\u001b[39;00m\n\u001b[1;32m 1330\u001b[0m \u001b[38;5;66;03m# also does have unique column names which is a prerequisite for the post-processing logic following.\u001b[39;00m\n\u001b[1;32m 1331\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m is_duplicate_free(\n\u001b[1;32m 1332\u001b[0m column_identifiers \u001b[38;5;241m+\u001b[39m variant_type_typeof_identifiers\n\u001b[1;32m 1333\u001b[0m ), \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSnowpark DataFrame to convert must have unique column identifiers\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m-> 1334\u001b[0m pandas_df \u001b[38;5;241m=\u001b[39m \u001b[43mordered_dataframe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pandas\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstatement_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstatement_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1336\u001b[0m \u001b[38;5;66;03m# Step 3: perform post-processing\u001b[39;00m\n\u001b[1;32m 1337\u001b[0m \u001b[38;5;66;03m# If the dataframe has no rows, do not perform this. Using the result of the `apply` on\u001b[39;00m\n\u001b[1;32m 1338\u001b[0m \u001b[38;5;66;03m# an empty frame would erroneously update the dtype of the column to be `float64` instead of `object`.\u001b[39;00m\n\u001b[1;32m 1339\u001b[0m \u001b[38;5;66;03m# TODO SNOW-982779: verify correctness of this behavior\u001b[39;00m\n\u001b[1;32m 1340\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m pandas_df\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m!=\u001b[39m \u001b[38;5;241m0\u001b[39m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/ordered_dataframe.py:1786\u001b[0m, in \u001b[0;36mOrderedDataFrame.to_pandas\u001b[0;34m(self, statement_params, block, **kwargs)\u001b[0m\n\u001b[1;32m 1784\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1785\u001b[0m statement_params\u001b[38;5;241m.\u001b[39mupdate(get_default_snowpark_pandas_statement_params())\n\u001b[0;32m-> 1786\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msnowpark_dataframe\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_pandas\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1787\u001b[0m \u001b[43m \u001b[49m\u001b[43mstatement_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstatement_params\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblock\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 1788\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/telemetry.py:153\u001b[0m, in \u001b[0;36mdf_collect_api_telemetry..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 150\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 151\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 152\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m args[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39m_session\u001b[38;5;241m.\u001b[39mquery_history() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 153\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 154\u001b[0m plan \u001b[38;5;241m=\u001b[39m args[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39m_select_statement \u001b[38;5;129;01mor\u001b[39;00m args[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39m_plan\n\u001b[1;32m 155\u001b[0m api_calls \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 156\u001b[0m \u001b[38;5;241m*\u001b[39mplan\u001b[38;5;241m.\u001b[39mapi_calls,\n\u001b[1;32m 157\u001b[0m {TelemetryField\u001b[38;5;241m.\u001b[39mNAME\u001b[38;5;241m.\u001b[39mvalue: \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDataFrame.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m},\n\u001b[1;32m 158\u001b[0m ]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/dataframe.py:806\u001b[0m, in \u001b[0;36mDataFrame.to_pandas\u001b[0;34m(self, statement_params, block, **kwargs)\u001b[0m\n\u001b[1;32m 787\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 788\u001b[0m \u001b[38;5;124;03mExecutes the query representing this DataFrame and returns the result as a\u001b[39;00m\n\u001b[1;32m 789\u001b[0m \u001b[38;5;124;03m`pandas DataFrame `_.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 803\u001b[0m \u001b[38;5;124;03m :func:`Session.sql` can only be a SELECT statement.\u001b[39;00m\n\u001b[1;32m 804\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 805\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m open_telemetry_context_manager(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mto_pandas, \u001b[38;5;28mself\u001b[39m):\n\u001b[0;32m--> 806\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_session\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_conn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 807\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_plan\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 808\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_pandas\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 809\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 810\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_AsyncResultType\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mPANDAS\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 811\u001b[0m \u001b[43m \u001b[49m\u001b[43m_statement_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcreate_or_update_statement_params_with_query_tag\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 812\u001b[0m \u001b[43m \u001b[49m\u001b[43mstatement_params\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_statement_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 813\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_session\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mquery_tag\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 814\u001b[0m \u001b[43m \u001b[49m\u001b[43mSKIP_LEVELS_TWO\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 815\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 816\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 817\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 819\u001b[0m \u001b[38;5;66;03m# if the returned result is not a pandas dataframe, raise Exception\u001b[39;00m\n\u001b[1;32m 820\u001b[0m \u001b[38;5;66;03m# this might happen when calling this method with non-select commands\u001b[39;00m\n\u001b[1;32m 821\u001b[0m \u001b[38;5;66;03m# e.g., session.sql(\"create ...\").to_pandas()\u001b[39;00m\n\u001b[1;32m 822\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m block:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:526\u001b[0m, in \u001b[0;36mServerConnection.execute\u001b[0;34m(self, plan, to_pandas, to_iter, block, data_type, log_on_exception, case_sensitive, **kwargs)\u001b[0m\n\u001b[1;32m 516\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 517\u001b[0m is_in_stored_procedure()\n\u001b[1;32m 518\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m block\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 521\u001b[0m )\n\u001b[1;32m 522\u001b[0m ): \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 523\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 524\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAsync query is not supported in stored procedure yet\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 525\u001b[0m )\n\u001b[0;32m--> 526\u001b[0m result_set, result_meta \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_result_set\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 527\u001b[0m \u001b[43m \u001b[49m\u001b[43mplan\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 528\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_pandas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 529\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_iter\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 530\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 531\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mblock\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 532\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 533\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_on_exception\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlog_on_exception\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 534\u001b[0m \u001b[43m \u001b[49m\u001b[43mcase_sensitive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcase_sensitive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 535\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 536\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m block:\n\u001b[1;32m 537\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m result_set\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py:207\u001b[0m, in \u001b[0;36mSnowflakePlan.Decorator.wrap_exception..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 204\u001b[0m ne \u001b[38;5;241m=\u001b[39m SnowparkClientExceptionMessages\u001b[38;5;241m.\u001b[39mSQL_EXCEPTION_FROM_PROGRAMMING_ERROR(\n\u001b[1;32m 205\u001b[0m e\n\u001b[1;32m 206\u001b[0m )\n\u001b[0;32m--> 207\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ne\u001b[38;5;241m.\u001b[39mwith_traceback(tb) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/analyzer/snowflake_plan.py:138\u001b[0m, in \u001b[0;36mSnowflakePlan.Decorator.wrap_exception..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 138\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 139\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m snowflake\u001b[38;5;241m.\u001b[39mconnector\u001b[38;5;241m.\u001b[39merrors\u001b[38;5;241m.\u001b[39mProgrammingError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 140\u001b[0m query \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(e, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:630\u001b[0m, in \u001b[0;36mServerConnection.get_result_set\u001b[0;34m(self, plan, to_pandas, to_iter, block, data_type, log_on_exception, case_sensitive, ignore_results, **kwargs)\u001b[0m\n\u001b[1;32m 628\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m holder, id_ \u001b[38;5;129;01min\u001b[39;00m placeholders\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 629\u001b[0m final_query \u001b[38;5;241m=\u001b[39m final_query\u001b[38;5;241m.\u001b[39mreplace(holder, id_)\n\u001b[0;32m--> 630\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_query\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 631\u001b[0m \u001b[43m \u001b[49m\u001b[43mfinal_query\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 632\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_pandas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 633\u001b[0m \u001b[43m \u001b[49m\u001b[43mto_iter\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mand\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mi\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmain_queries\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m-\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 634\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_ddl_on_temp_object\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mis_ddl_on_temp_object\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 635\u001b[0m \u001b[43m \u001b[49m\u001b[43mblock\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mis_last\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 636\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 637\u001b[0m \u001b[43m \u001b[49m\u001b[43masync_job_plan\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mplan\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 638\u001b[0m \u001b[43m \u001b[49m\u001b[43mlog_on_exception\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlog_on_exception\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 639\u001b[0m \u001b[43m \u001b[49m\u001b[43mcase_sensitive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcase_sensitive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 640\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mquery\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 641\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_results\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_results\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 642\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 643\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 644\u001b[0m placeholders[query\u001b[38;5;241m.\u001b[39mquery_id_place_holder] \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 645\u001b[0m result[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msfqid\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_last \u001b[38;5;28;01melse\u001b[39;00m result\u001b[38;5;241m.\u001b[39mquery_id\n\u001b[1;32m 646\u001b[0m )\n\u001b[1;32m 647\u001b[0m result_meta \u001b[38;5;241m=\u001b[39m get_new_description(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_cursor)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:125\u001b[0m, in \u001b[0;36mServerConnection._Decorator.wrap_exception..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SnowparkClientExceptionMessages\u001b[38;5;241m.\u001b[39mSERVER_SESSION_EXPIRED(\n\u001b[1;32m 122\u001b[0m ex\u001b[38;5;241m.\u001b[39mcause\n\u001b[1;32m 123\u001b[0m )\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ex\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:119\u001b[0m, in \u001b[0;36mServerConnection._Decorator.wrap_exception..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 117\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SnowparkClientExceptionMessages\u001b[38;5;241m.\u001b[39mSERVER_SESSION_HAS_BEEN_CLOSED()\n\u001b[1;32m 118\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 119\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m ReauthenticationRequest \u001b[38;5;28;01mas\u001b[39;00m ex:\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SnowparkClientExceptionMessages\u001b[38;5;241m.\u001b[39mSERVER_SESSION_EXPIRED(\n\u001b[1;32m 122\u001b[0m ex\u001b[38;5;241m.\u001b[39mcause\n\u001b[1;32m 123\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:433\u001b[0m, in \u001b[0;36mServerConnection.run_query\u001b[0;34m(self, query, to_pandas, to_iter, is_ddl_on_temp_object, block, data_type, async_job_plan, log_on_exception, case_sensitive, params, num_statements, ignore_results, **kwargs)\u001b[0m\n\u001b[1;32m 431\u001b[0m query_id_log \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m [queryID: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mex\u001b[38;5;241m.\u001b[39msfqid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(ex, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msfqid\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 432\u001b[0m logger\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to execute query\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquery_id_log\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquery\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mex\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 433\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ex\n\u001b[1;32m 435\u001b[0m \u001b[38;5;66;03m# fetch_pandas_all/batches() only works for SELECT statements\u001b[39;00m\n\u001b[1;32m 436\u001b[0m \u001b[38;5;66;03m# We call fetchall() if fetch_pandas_all/batches() fails,\u001b[39;00m\n\u001b[1;32m 437\u001b[0m \u001b[38;5;66;03m# because when the query plan has multiple queries, it will\u001b[39;00m\n\u001b[1;32m 438\u001b[0m \u001b[38;5;66;03m# have non-select statements, and it shouldn't fail if the user\u001b[39;00m\n\u001b[1;32m 439\u001b[0m \u001b[38;5;66;03m# calls to_pandas() to execute the query.\u001b[39;00m\n\u001b[1;32m 440\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m block:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:418\u001b[0m, in \u001b[0;36mServerConnection.run_query\u001b[0;34m(self, query, to_pandas, to_iter, is_ddl_on_temp_object, block, data_type, async_job_plan, log_on_exception, case_sensitive, params, num_statements, ignore_results, **kwargs)\u001b[0m\n\u001b[1;32m 416\u001b[0m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_statement_params\u001b[39m\u001b[38;5;124m\"\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSNOWPARK_SKIP_TXN_COMMIT_IN_DDL\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 417\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m block:\n\u001b[0;32m--> 418\u001b[0m results_cursor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_and_notify_query_listener\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 421\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExecute query [queryID: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresults_cursor\u001b[38;5;241m.\u001b[39msfqid\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquery\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 422\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/_internal/server_connection.py:369\u001b[0m, in \u001b[0;36mServerConnection.execute_and_notify_query_listener\u001b[0;34m(self, query, **kwargs)\u001b[0m\n\u001b[1;32m 366\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mexecute_and_notify_query_listener\u001b[39m(\n\u001b[1;32m 367\u001b[0m \u001b[38;5;28mself\u001b[39m, query: \u001b[38;5;28mstr\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any\n\u001b[1;32m 368\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m SnowflakeCursor:\n\u001b[0;32m--> 369\u001b[0m results_cursor \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_cursor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnotify_query_listeners(\n\u001b[1;32m 371\u001b[0m QueryRecord(results_cursor\u001b[38;5;241m.\u001b[39msfqid, results_cursor\u001b[38;5;241m.\u001b[39mquery)\n\u001b[1;32m 372\u001b[0m )\n\u001b[1;32m 373\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m results_cursor\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/connector/cursor.py:1080\u001b[0m, in \u001b[0;36mSnowflakeCursor.execute\u001b[0;34m(self, command, params, _bind_stage, timeout, _exec_async, _no_retry, _do_reset, _put_callback, _put_azure_callback, _put_callback_output_stream, _get_callback, _get_azure_callback, _get_callback_output_stream, _show_progress_bar, _statement_params, _is_internal, _describe_only, _no_results, _is_put_get, _raise_put_get_error, _force_put_overwrite, _skip_upload_on_content_match, file_stream, num_statements)\u001b[0m\n\u001b[1;32m 1076\u001b[0m is_integrity_error \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1077\u001b[0m code \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m100072\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1078\u001b[0m ) \u001b[38;5;66;03m# NULL result in a non-nullable column\u001b[39;00m\n\u001b[1;32m 1079\u001b[0m error_class \u001b[38;5;241m=\u001b[39m IntegrityError \u001b[38;5;28;01mif\u001b[39;00m is_integrity_error \u001b[38;5;28;01melse\u001b[39;00m ProgrammingError\n\u001b[0;32m-> 1080\u001b[0m \u001b[43mError\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merrorhandler_wrapper\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconnection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror_class\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1081\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/connector/errors.py:290\u001b[0m, in \u001b[0;36mError.errorhandler_wrapper\u001b[0;34m(connection, cursor, error_class, error_value)\u001b[0m\n\u001b[1;32m 267\u001b[0m \u001b[38;5;129m@staticmethod\u001b[39m\n\u001b[1;32m 268\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21merrorhandler_wrapper\u001b[39m(\n\u001b[1;32m 269\u001b[0m connection: SnowflakeConnection \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 272\u001b[0m error_value: \u001b[38;5;28mdict\u001b[39m[\u001b[38;5;28mstr\u001b[39m, Any],\n\u001b[1;32m 273\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 274\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Error handler wrapper that calls the errorhandler method.\u001b[39;00m\n\u001b[1;32m 275\u001b[0m \n\u001b[1;32m 276\u001b[0m \u001b[38;5;124;03m Args:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 287\u001b[0m \u001b[38;5;124;03m exception to the first handler in that order.\u001b[39;00m\n\u001b[1;32m 288\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 290\u001b[0m handed_over \u001b[38;5;241m=\u001b[39m \u001b[43mError\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mhand_to_other_handler\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 291\u001b[0m \u001b[43m \u001b[49m\u001b[43mconnection\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 292\u001b[0m \u001b[43m \u001b[49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 293\u001b[0m \u001b[43m \u001b[49m\u001b[43merror_class\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 294\u001b[0m \u001b[43m \u001b[49m\u001b[43merror_value\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 295\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 296\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m handed_over:\n\u001b[1;32m 297\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Error\u001b[38;5;241m.\u001b[39merrorhandler_make_exception(\n\u001b[1;32m 298\u001b[0m error_class,\n\u001b[1;32m 299\u001b[0m error_value,\n\u001b[1;32m 300\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/connector/errors.py:345\u001b[0m, in \u001b[0;36mError.hand_to_other_handler\u001b[0;34m(connection, cursor, error_class, error_value)\u001b[0m\n\u001b[1;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cursor \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 344\u001b[0m cursor\u001b[38;5;241m.\u001b[39mmessages\u001b[38;5;241m.\u001b[39mappend((error_class, error_value))\n\u001b[0;32m--> 345\u001b[0m \u001b[43mcursor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merrorhandler\u001b[49m\u001b[43m(\u001b[49m\u001b[43mconnection\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcursor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror_class\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror_value\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 346\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 347\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m connection \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/connector/errors.py:221\u001b[0m, in \u001b[0;36mError.default_errorhandler\u001b[0;34m(connection, cursor, error_class, error_value)\u001b[0m\n\u001b[1;32m 219\u001b[0m errno \u001b[38;5;241m=\u001b[39m error_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124merrno\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 220\u001b[0m done_format_msg \u001b[38;5;241m=\u001b[39m error_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdone_format_msg\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 221\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m error_class(\n\u001b[1;32m 222\u001b[0m msg\u001b[38;5;241m=\u001b[39merror_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmsg\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 223\u001b[0m errno\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m errno \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mint\u001b[39m(errno),\n\u001b[1;32m 224\u001b[0m sqlstate\u001b[38;5;241m=\u001b[39merror_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msqlstate\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 225\u001b[0m sfqid\u001b[38;5;241m=\u001b[39merror_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msfqid\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 226\u001b[0m query\u001b[38;5;241m=\u001b[39merror_value\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery\u001b[39m\u001b[38;5;124m\"\u001b[39m),\n\u001b[1;32m 227\u001b[0m done_format_msg\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 228\u001b[0m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01mif\u001b[39;00m done_format_msg \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mbool\u001b[39m(done_format_msg)\n\u001b[1;32m 229\u001b[0m ),\n\u001b[1;32m 230\u001b[0m connection\u001b[38;5;241m=\u001b[39mconnection,\n\u001b[1;32m 231\u001b[0m cursor\u001b[38;5;241m=\u001b[39mcursor,\n\u001b[1;32m 232\u001b[0m )\n", + "\u001b[0;31mSnowparkSQLException\u001b[0m: (1304): 01b6ae78-0d07-1a71-0002-990387e5d42b: 100035 (22007): Timestamp 'Jul 31, 2009' is not recognized" + ] + } + ], + "source": [ + "pd.to_datetime(pd.Series([\"Jul 31, 2009\", \"Jan 10, 2010\", None]))" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "bc0d555d-e2af-42fe-96af-024274ce001e", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:56.273155Z", + "iopub.status.busy": "2024-08-29T21:12:56.273001Z", + "iopub.status.idle": "2024-08-29T21:12:57.106508Z", + "shell.execute_reply": "2024-08-29T21:12:57.105822Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2005-11-23', '2010-12-31'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime([\"2005/11/23\", \"2010/12/31\"])" + ] + }, + { + "cell_type": "markdown", + "id": "12126dc9-6fad-4667-a3ce-82ccb0ff54df", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### If you use dates which start with the day first (i.e. European style), you can pass the dayfirst flag:" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "88419653-e387-4cb7-a25a-428dccd767a3", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:57.112766Z", + "iopub.status.busy": "2024-08-29T21:12:57.112379Z", + "iopub.status.idle": "2024-08-29T21:12:57.808940Z", + "shell.execute_reply": "2024-08-29T21:12:57.802786Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-01-04 10:00:00'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime([\"04-01-2012 10:00\"], dayfirst=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "e5f75293-3b7f-47ba-9348-9b71aa6ea774", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:57.827247Z", + "iopub.status.busy": "2024-08-29T21:12:57.826810Z", + "iopub.status.idle": "2024-08-29T21:12:58.529092Z", + "shell.execute_reply": "2024-08-29T21:12:58.528007Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "UserWarning: Parsing dates in %m-%d-%Y %H:%M format when dayfirst=True was specified. Pass `dayfirst=False` or specify a format to silence this warning.\n" + ] + }, + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-04-14 10:00:00'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime([\"04-14-2012 10:00\"], dayfirst=True)" + ] + }, + { + "cell_type": "markdown", + "id": "901c62d1-5db8-47d7-b32a-f3e911f02036", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### You can also use the DatetimeIndex constructor directly:" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "5d8cfe2e-b991-4036-acdf-a6b4da02f1c0", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:58.534583Z", + "iopub.status.busy": "2024-08-29T21:12:58.534156Z", + "iopub.status.idle": "2024-08-29T21:12:59.059427Z", + "shell.execute_reply": "2024-08-29T21:12:59.057882Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DatetimeIndex([\"2018-01-01\", \"2018-01-03\", \"2018-01-05\"])" + ] + }, + { + "cell_type": "markdown", + "id": "b8ebb2fc-173c-406a-ade8-d807b1073b2b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### The string ‘infer’ can be passed in order to set the frequency of the index as the inferred frequency upon creation:" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "f06dcd55-ebf6-4c33-b299-8c8f7fde94b3", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:59.066203Z", + "iopub.status.busy": "2024-08-29T21:12:59.065831Z", + "iopub.status.idle": "2024-08-29T21:12:59.601774Z", + "shell.execute_reply": "2024-08-29T21:12:59.600584Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01', '2018-01-03', '2018-01-05'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DatetimeIndex([\"2018-01-01\", \"2018-01-03\", \"2018-01-05\"], freq=\"infer\")" + ] + }, + { + "cell_type": "markdown", + "id": "82fd0285-3c12-4429-a10e-d505192fe2a1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Providing a format argument" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "bce3c8c4-bfed-432c-a6f1-1b18151843bc", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:59.608185Z", + "iopub.status.busy": "2024-08-29T21:12:59.607833Z", + "iopub.status.idle": "2024-08-29T21:12:59.613136Z", + "shell.execute_reply": "2024-08-29T21:12:59.612608Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2010-11-12 00:00:00')" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(\"2010/11/12\", format=\"%Y/%m/%d\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "6adee443-0929-49a7-ab1f-d3f3a2d7d164", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:59.618039Z", + "iopub.status.busy": "2024-08-29T21:12:59.617739Z", + "iopub.status.idle": "2024-08-29T21:12:59.622386Z", + "shell.execute_reply": "2024-08-29T21:12:59.621865Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2010-11-12 00:00:00')" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(\"12-11-2010 00:00\", format=\"%d-%m-%Y %H:%M\")" + ] + }, + { + "cell_type": "markdown", + "id": "4484371f-ef3a-4e7f-a16a-83dbb0a74741", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Assembling datetime from multiple DataFrame columns" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "00230009-1790-48ea-b58b-d68e162b1d8d", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:12:59.626836Z", + "iopub.status.busy": "2024-08-29T21:12:59.626477Z", + "iopub.status.idle": "2024-08-29T21:13:00.302444Z", + "shell.execute_reply": "2024-08-29T21:13:00.301244Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2015-02-04 02:00:00\n", + "1 2016-03-05 03:00:00\n", + "dtype: datetime64[ns]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.DataFrame(\n", + " {\"year\": [2015, 2016], \"month\": [2, 3], \"day\": [4, 5], \"hour\": [2, 3]}\n", + ")\n", + "\n", + "\n", + "pd.to_datetime(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "d1481451-a63f-4f1d-8c58-b570ad099bca", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:00.313705Z", + "iopub.status.busy": "2024-08-29T21:13:00.313078Z", + "iopub.status.idle": "2024-08-29T21:13:00.856785Z", + "shell.execute_reply": "2024-08-29T21:13:00.856191Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2015-02-04\n", + "1 2016-03-05\n", + "dtype: datetime64[ns]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(df[[\"year\", \"month\", \"day\"]])" + ] + }, + { + "cell_type": "markdown", + "id": "d0f4fd47-3a16-4b5a-99ca-ef698e8188d1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Invalid data" + ] + }, + { + "cell_type": "markdown", + "id": "273d5d66-590f-4d88-a3c6-2623827ed8f1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### The default behavior, errors='raise', is to raise when unparsable:" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "3a90e705-7028-4604-b1c8-ad37d9ad804c", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:00.866862Z", + "iopub.status.busy": "2024-08-29T21:13:00.866601Z", + "iopub.status.idle": "2024-08-29T21:13:01.222078Z", + "shell.execute_reply": "2024-08-29T21:13:01.221762Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "ValueError", + "evalue": "time data \"asd\" doesn't match format \"%Y/%m/%d\", at position 1. You might want to try:\n - passing `format` if your strings have a consistent format;\n - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;\n - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[36], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m2009/07/31\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43masd\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mraise\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:454\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_standalone_function_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/general.py:1786\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1782\u001b[0m raise_if_native_pandas_objects(arg)\n\u001b[1;32m 1784\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(arg, (DataFrame, Series, pd\u001b[38;5;241m.\u001b[39mIndex)):\n\u001b[1;32m 1785\u001b[0m \u001b[38;5;66;03m# use pandas.to_datetime to convert local data to datetime\u001b[39;00m\n\u001b[0;32m-> 1786\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mpandas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_datetime\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1787\u001b[0m \u001b[43m \u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1788\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1789\u001b[0m \u001b[43m \u001b[49m\u001b[43mdayfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1790\u001b[0m \u001b[43m \u001b[49m\u001b[43myearfirst\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1791\u001b[0m \u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1792\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1793\u001b[0m \u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1794\u001b[0m \u001b[43m \u001b[49m\u001b[43munit\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1795\u001b[0m \u001b[43m \u001b[49m\u001b[43minfer_datetime_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1796\u001b[0m \u001b[43m \u001b[49m\u001b[43morigin\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1797\u001b[0m \u001b[43m \u001b[49m\u001b[43mcache\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1798\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1799\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(res, pandas\u001b[38;5;241m.\u001b[39mSeries):\n\u001b[1;32m 1800\u001b[0m res \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mSeries(res)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:1099\u001b[0m, in \u001b[0;36mto_datetime\u001b[0;34m(arg, errors, dayfirst, yearfirst, utc, format, exact, unit, infer_datetime_format, origin, cache)\u001b[0m\n\u001b[1;32m 1097\u001b[0m result \u001b[38;5;241m=\u001b[39m _convert_and_box_cache(argc, cache_array)\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1099\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mconvert_listlike\u001b[49m\u001b[43m(\u001b[49m\u001b[43margc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1101\u001b[0m result \u001b[38;5;241m=\u001b[39m convert_listlike(np\u001b[38;5;241m.\u001b[39marray([arg]), \u001b[38;5;28mformat\u001b[39m)[\u001b[38;5;241m0\u001b[39m]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:433\u001b[0m, in \u001b[0;36m_convert_listlike_datetimes\u001b[0;34m(arg, format, name, utc, unit, errors, dayfirst, yearfirst, exact)\u001b[0m\n\u001b[1;32m 431\u001b[0m \u001b[38;5;66;03m# `format` could be inferred, or user didn't ask for mixed-format parsing.\u001b[39;00m\n\u001b[1;32m 432\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mformat\u001b[39m \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmixed\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 433\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_array_strptime_with_fallback\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 435\u001b[0m result, tz_parsed \u001b[38;5;241m=\u001b[39m objects_to_datetime64(\n\u001b[1;32m 436\u001b[0m arg,\n\u001b[1;32m 437\u001b[0m dayfirst\u001b[38;5;241m=\u001b[39mdayfirst,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 441\u001b[0m allow_object\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m,\n\u001b[1;32m 442\u001b[0m )\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_parsed \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 445\u001b[0m \u001b[38;5;66;03m# We can take a shortcut since the datetime64 numpy array\u001b[39;00m\n\u001b[1;32m 446\u001b[0m \u001b[38;5;66;03m# is in UTC\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/pandas/core/tools/datetimes.py:467\u001b[0m, in \u001b[0;36m_array_strptime_with_fallback\u001b[0;34m(arg, name, utc, fmt, exact, errors)\u001b[0m\n\u001b[1;32m 456\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_array_strptime_with_fallback\u001b[39m(\n\u001b[1;32m 457\u001b[0m arg,\n\u001b[1;32m 458\u001b[0m name,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 462\u001b[0m errors: \u001b[38;5;28mstr\u001b[39m,\n\u001b[1;32m 463\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Index:\n\u001b[1;32m 464\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 465\u001b[0m \u001b[38;5;124;03m Call array_strptime, with fallback behavior depending on 'errors'.\u001b[39;00m\n\u001b[1;32m 466\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 467\u001b[0m result, tz_out \u001b[38;5;241m=\u001b[39m \u001b[43marray_strptime\u001b[49m\u001b[43m(\u001b[49m\u001b[43marg\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfmt\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexact\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexact\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mutc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mutc\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m tz_out \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 469\u001b[0m unit \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mdatetime_data(result\u001b[38;5;241m.\u001b[39mdtype)[\u001b[38;5;241m0\u001b[39m]\n", + "File \u001b[0;32mstrptime.pyx:501\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime.array_strptime\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mstrptime.pyx:451\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime.array_strptime\u001b[0;34m()\u001b[0m\n", + "File \u001b[0;32mstrptime.pyx:583\u001b[0m, in \u001b[0;36mpandas._libs.tslibs.strptime._parse_with_format\u001b[0;34m()\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: time data \"asd\" doesn't match format \"%Y/%m/%d\", at position 1. You might want to try:\n - passing `format` if your strings have a consistent format;\n - passing `format='ISO8601'` if your strings are all ISO8601 but not necessarily in exactly the same format;\n - passing `format='mixed'`, and the format will be inferred for each element individually. You might want to use `dayfirst` alongside this." + ] + } + ], + "source": [ + "pd.to_datetime(['2009/07/31', 'asd'], errors='raise')" + ] + }, + { + "cell_type": "markdown", + "id": "71a6317f-66eb-4689-a15f-61a03b2c93c4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Pass errors='coerce' to convert unparsable data to NaT (not a time):" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "c15076fb-8bf0-47b3-9178-e745b27b0eae", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:01.233855Z", + "iopub.status.busy": "2024-08-29T21:13:01.233738Z", + "iopub.status.idle": "2024-08-29T21:13:01.904070Z", + "shell.execute_reply": "2024-08-29T21:13:01.903200Z" + }, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2009-07-31', 'NaT'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(['2009/07/31', 'asd'], errors='coerce')" + ] + }, + { + "cell_type": "markdown", + "id": "ea663c7c-13a2-458f-85cd-999116b6b7ed", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Epoch timestamps" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "018f285f-ad0a-454c-abe4-7ee1fcf7acfb", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:01.909194Z", + "iopub.status.busy": "2024-08-29T21:13:01.908859Z", + "iopub.status.idle": "2024-08-29T21:13:02.578918Z", + "shell.execute_reply": "2024-08-29T21:13:02.578486Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',\n", + " '2012-10-10 18:15:05', '2012-10-11 18:15:05',\n", + " '2012-10-12 18:15:05'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(\n", + " [1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit=\"s\"\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "7f15dd29-958a-4279-a14a-75851710cf65", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:02.581162Z", + "iopub.status.busy": "2024-08-29T21:13:02.580986Z", + "iopub.status.idle": "2024-08-29T21:13:03.340376Z", + "shell.execute_reply": "2024-08-29T21:13:03.339624Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['1970-01-16 14:55:20.105000', '1970-01-16 14:56:46.505000',\n", + " '1970-01-16 14:58:12.905000', '1970-01-16 14:59:39.305000',\n", + " '1970-01-16 15:01:05.705000'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime(\n", + " [1349720105, 1349806505, 1349892905, 1349979305, 1350065705], unit=\"ms\"\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "id": "f796ad57-6bce-4930-9217-5da5b9d37dc0", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Constructing a Timestamp or DatetimeIndex with an epoch timestamp with the tz argument specified will raise a ValueError. If you have epochs in wall time in another timezone, you can read the epochs as timezone-naive timestamps and then localize to the appropriate timezone:" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "f565054c-8ecf-47f3-a601-181d1b66cb59", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:03.345596Z", + "iopub.status.busy": "2024-08-29T21:13:03.345274Z", + "iopub.status.idle": "2024-08-29T21:13:03.411334Z", + "shell.execute_reply": "2024-08-29T21:13:03.411013Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2010-01-01 12:00:00-0800', tz='US/Pacific')" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Timestamp(1262347200000000000).tz_localize(\"US/Pacific\")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "06b3d9cc-9a51-4039-a9a7-84c66d4221a8", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:03.413022Z", + "iopub.status.busy": "2024-08-29T21:13:03.412902Z", + "iopub.status.idle": "2024-08-29T21:13:03.588840Z", + "shell.execute_reply": "2024-08-29T21:13:03.588516Z" + }, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method DatetimeIndex.tz_localize", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[41], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDatetimeIndex\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m1262347200000000000\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtz_localize\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mUS/Pacific\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method DatetimeIndex.tz_localize" + ] + } + ], + "source": [ + "pd.DatetimeIndex([1262347200000000000]).tz_localize(\"US/Pacific\")" + ] + }, + { + "cell_type": "markdown", + "id": "86658f09-80b8-42a5-a108-efc4dddfdb09", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "### From timestamps to epoch" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "0e4e6063-a60f-4a70-adca-2cb9b3b101f8", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:03.595180Z", + "iopub.status.busy": "2024-08-29T21:13:03.595035Z", + "iopub.status.idle": "2024-08-29T21:13:04.620415Z", + "shell.execute_reply": "2024-08-29T21:13:04.619552Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',\n", + " '2012-10-10 18:15:05', '2012-10-11 18:15:05'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "stamps = pd.date_range(\"2012-10-08 18:15:05\", periods=4, freq=\"D\")\n", + "stamps" + ] + }, + { + "cell_type": "markdown", + "id": "720ebf04-b3d2-45c7-9393-3b064c3cf6d1", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "#### We subtract the epoch (midnight at January 1, 1970 UTC) and then floor divide by the “unit” (1 second)." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "f4a38e8b-abcb-49c6-839d-01e4215d7d7a", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:04.624393Z", + "iopub.status.busy": "2024-08-29T21:13:04.624099Z", + "iopub.status.idle": "2024-08-29T21:13:05.994009Z", + "shell.execute_reply": "2024-08-29T21:13:05.989798Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index([1349720105, 1349806505, 1349892905, 1349979305], dtype='int64')" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(stamps - pd.Timestamp(\"1970-01-01\")) // pd.Timedelta(\"1s\")" + ] + }, + { + "cell_type": "markdown", + "id": "09d2b860-df2d-4069-b4b0-2dd1a8697faf", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Using the origin parameter" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "099f77b7-7a79-4dcd-bc37-552616cbb4c0", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:06.006249Z", + "iopub.status.busy": "2024-08-29T21:13:06.004156Z", + "iopub.status.idle": "2024-08-29T21:13:06.696681Z", + "shell.execute_reply": "2024-08-29T21:13:06.695745Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime([1, 2, 3], unit=\"D\", origin=pd.Timestamp(\"1960-01-01\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "e1878fc8-b2f3-4c93-b118-6d185ea19ce5", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:06.701885Z", + "iopub.status.busy": "2024-08-29T21:13:06.701537Z", + "iopub.status.idle": "2024-08-29T21:13:07.424582Z", + "shell.execute_reply": "2024-08-29T21:13:07.422794Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['1970-01-02', '1970-01-03', '1970-01-04'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime([1, 2, 3], unit=\"D\")" + ] + }, + { + "cell_type": "markdown", + "id": "6415bc76-3599-4c37-ab86-2b841bac3b95", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Generating ranges of timestamps" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "e349e6b7-fdda-4879-b469-9719d2b74be0", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:07.429375Z", + "iopub.status.busy": "2024-08-29T21:13:07.428977Z", + "iopub.status.idle": "2024-08-29T21:13:07.946936Z", + "shell.execute_reply": "2024-08-29T21:13:07.946186Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dates = [\n", + " datetime.datetime(2012, 5, 1),\n", + " datetime.datetime(2012, 5, 2),\n", + " datetime.datetime(2012, 5, 3),\n", + "]\n", + "\n", + "\n", + "# Note the frequency information\n", + "index = pd.DatetimeIndex(dates)\n", + "\n", + "index" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "7c293a9c-a307-4bf3-9414-e5db0abc5b55", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:07.952090Z", + "iopub.status.busy": "2024-08-29T21:13:07.951757Z", + "iopub.status.idle": "2024-08-29T21:13:08.656604Z", + "shell.execute_reply": "2024-08-29T21:13:08.655201Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-05-01', '2012-05-02', '2012-05-03'], dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Automatically converted to DatetimeIndex\n", + "index = pd.Index(dates)\n", + "\n", + "index" + ] + }, + { + "cell_type": "markdown", + "id": "864f9e1c-969d-489e-8e3e-aab762e4680a", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### In practice this becomes very cumbersome because we often need a very long index with a large number of timestamps. If we need timestamps on a regular frequency, we can use the date_range() and bdate_range() functions to create a DatetimeIndex. The default frequency for date_range is a calendar day while the default for bdate_range is a business day:" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "9576a971-d970-4fd0-95fe-46738536c6d3", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:08.663459Z", + "iopub.status.busy": "2024-08-29T21:13:08.663082Z", + "iopub.status.idle": "2024-08-29T21:13:09.867935Z", + "shell.execute_reply": "2024-08-29T21:13:09.866893Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04',\n", + " '2011-01-05', '2011-01-06', '2011-01-07', '2011-01-08',\n", + " '2011-01-09', '2011-01-10',\n", + " ...\n", + " '2011-12-23', '2011-12-24', '2011-12-25', '2011-12-26',\n", + " '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30',\n", + " '2011-12-31', '2012-01-01'],\n", + " dtype='datetime64[ns]', length=366, freq=None)" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "start = datetime.datetime(2011, 1, 1)\n", + "\n", + "end = datetime.datetime(2012, 1, 1)\n", + "\n", + "index = pd.date_range(start, end)\n", + "\n", + "index" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "c2661f9d-f500-4886-b199-159593a9d23c", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:09.884058Z", + "iopub.status.busy": "2024-08-29T21:13:09.883330Z", + "iopub.status.idle": "2024-08-29T21:13:09.894222Z", + "shell.execute_reply": "2024-08-29T21:13:09.893806Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',\n", + " '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',\n", + " '2011-01-13', '2011-01-14',\n", + " ...\n", + " '2011-12-19', '2011-12-20', '2011-12-21', '2011-12-22',\n", + " '2011-12-23', '2011-12-26', '2011-12-27', '2011-12-28',\n", + " '2011-12-29', '2011-12-30'],\n", + " dtype='datetime64[ns]', length=260, freq='B')" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "index = pd.bdate_range(start, end)\n", + "\n", + "index" + ] + }, + { + "cell_type": "markdown", + "id": "ff77a6bc-8197-4e48-b8e7-f182a3f6e742", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Convenience functions like date_range and bdate_range can utilize a variety of frequency aliases:" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "2d80a37d-b58e-4eee-a8e6-8cef919547cc", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:09.897578Z", + "iopub.status.busy": "2024-08-29T21:13:09.897407Z", + "iopub.status.idle": "2024-08-29T21:13:10.957551Z", + "shell.execute_reply": "2024-08-29T21:13:10.956700Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',\n", + " '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',\n", + " '2011-09-30', '2011-10-31',\n", + " ...\n", + " '2093-07-31', '2093-08-31', '2093-09-30', '2093-10-31',\n", + " '2093-11-30', '2093-12-31', '2094-01-31', '2094-02-28',\n", + " '2094-03-31', '2094-04-30'],\n", + " dtype='datetime64[ns]', length=1000, freq=None)" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.date_range(start, periods=1000, freq=\"ME\")" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "b2d8e0eb-b332-4ad6-a503-6d92d774c65a", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:10.962554Z", + "iopub.status.busy": "2024-08-29T21:13:10.962185Z", + "iopub.status.idle": "2024-08-29T21:13:10.969894Z", + "shell.execute_reply": "2024-08-29T21:13:10.969391Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-03', '2011-04-01', '2011-07-01', '2011-10-03',\n", + " '2012-01-02', '2012-04-02', '2012-07-02', '2012-10-01',\n", + " '2013-01-01', '2013-04-01',\n", + " ...\n", + " '2071-01-01', '2071-04-01', '2071-07-01', '2071-10-01',\n", + " '2072-01-01', '2072-04-01', '2072-07-01', '2072-10-03',\n", + " '2073-01-02', '2073-04-03'],\n", + " dtype='datetime64[ns]', length=250, freq='BQS-JAN')" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.bdate_range(start, periods=250, freq=\"BQS\")" + ] + }, + { + "cell_type": "markdown", + "id": "1a42453c-bea7-470a-be57-650f42fea9a5", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### date_range and bdate_range make it easy to generate a range of dates using various combinations of parameters like start, end, periods, and freq. The start and end dates are strictly inclusive, so dates outside of those specified will not be generated:" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "9a30b89e-2a20-488f-a3a4-6106c20a6b3b", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:10.974736Z", + "iopub.status.busy": "2024-08-29T21:13:10.974462Z", + "iopub.status.idle": "2024-08-29T21:13:11.083073Z", + "shell.execute_reply": "2024-08-29T21:13:11.082758Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "offset BME is not implemented in Snowpark pandas API", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[52], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mBME\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:454\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_standalone_function_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/general.py:2405\u001b[0m, in \u001b[0;36mdate_range\u001b[0;34m(start, end, periods, freq, tz, normalize, name, inclusive, **kwargs)\u001b[0m\n\u001b[1;32m 2401\u001b[0m \u001b[38;5;66;03m# If a timezone is not explicitly given via `tz`, see if one can be inferred from the `start` and `end` endpoints.\u001b[39;00m\n\u001b[1;32m 2402\u001b[0m \u001b[38;5;66;03m# If more than one of these inputs provides a timezone, require that they all agree.\u001b[39;00m\n\u001b[1;32m 2403\u001b[0m tz \u001b[38;5;241m=\u001b[39m _infer_tz_from_endpoints(start, end, tz)\n\u001b[0;32m-> 2405\u001b[0m qc \u001b[38;5;241m=\u001b[39m \u001b[43mSnowflakeQueryCompiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_date_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2406\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2407\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2408\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2409\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2410\u001b[0m \u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2411\u001b[0m \u001b[43m \u001b[49m\u001b[43mleft_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mleft_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2412\u001b[0m \u001b[43m \u001b[49m\u001b[43mright_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mright_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2413\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2414\u001b[0m \u001b[38;5;66;03m# Set date range as index column.\u001b[39;00m\n\u001b[1;32m 2415\u001b[0m qc \u001b[38;5;241m=\u001b[39m qc\u001b[38;5;241m.\u001b[39mset_index_from_columns(qc\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mtolist(), include_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:691\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_date_range\u001b[0;34m(cls, start, end, periods, freq, tz, left_inclusive, right_inclusive)\u001b[0m\n\u001b[1;32m 689\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m ns_values\u001b[38;5;241m.\u001b[39mseries_to_datetime()\n\u001b[1;32m 690\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 691\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m \u001b[43mgenerator_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_irregular_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 692\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\n\u001b[1;32m 693\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 694\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 695\u001b[0m \u001b[38;5;66;03m# Create a linearly spaced date_range in local time\u001b[39;00m\n\u001b[1;32m 696\u001b[0m \u001b[38;5;66;03m# This is the original pandas source code:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[1;32m 701\u001b[0m \u001b[38;5;66;03m# Here we implement it similarly as np.linspace\u001b[39;00m\n\u001b[1;32m 702\u001b[0m div \u001b[38;5;241m=\u001b[39m periods \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;66;03m# type: ignore[operator]\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:216\u001b[0m, in \u001b[0;36mgenerate_irregular_range\u001b[0;34m(start, end, periods, offset)\u001b[0m\n\u001b[1;32m 213\u001b[0m periods \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 215\u001b[0m num_offsets \u001b[38;5;241m=\u001b[39m get_active_session()\u001b[38;5;241m.\u001b[39mrange(start\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, end\u001b[38;5;241m=\u001b[39mperiods, step\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 216\u001b[0m sf_date_or_time_part \u001b[38;5;241m=\u001b[39m \u001b[43m_offset_name_to_sf_date_or_time_part\u001b[49m\u001b[43m(\u001b[49m\u001b[43moffset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 217\u001b[0m dt_col \u001b[38;5;241m=\u001b[39m builtin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDATEADD\u001b[39m\u001b[38;5;124m\"\u001b[39m)(\n\u001b[1;32m 218\u001b[0m sf_date_or_time_part,\n\u001b[1;32m 219\u001b[0m offset\u001b[38;5;241m.\u001b[39mn \u001b[38;5;241m*\u001b[39m col(num_offsets\u001b[38;5;241m.\u001b[39mcolumns[\u001b[38;5;241m0\u001b[39m]),\n\u001b[1;32m 220\u001b[0m pandas_lit(start),\n\u001b[1;32m 221\u001b[0m )\n\u001b[1;32m 222\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m offset\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01min\u001b[39;00m LAST_DAY:\n\u001b[1;32m 223\u001b[0m \u001b[38;5;66;03m# When last day is required, we need to explicitly call LAST_DAY SQL function to convert DATEADD results to the\u001b[39;00m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;66;03m# last day, e.g., adding one month to \"2/29/2024\" using DATEADD results \"3/29/2024\", which is not the last day\u001b[39;00m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# of March. So we need to call LAST_DAY. Also, LAST_DAY only return the date, then we need to reconstruct the\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# timestamp using timestamp_ntz_from_parts\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:162\u001b[0m, in \u001b[0;36m_offset_name_to_sf_date_or_time_part\u001b[0;34m(name)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP:\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP[name]\n\u001b[0;32m--> 162\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43moffset \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m is not implemented in Snowpark pandas API\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: offset BME is not implemented in Snowpark pandas API" + ] + } + ], + "source": [ + "pd.date_range(start, end, freq=\"BME\")" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "cb8484ed-fbd1-4331-a87a-4382e2eb4a64", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:11.097524Z", + "iopub.status.busy": "2024-08-29T21:13:11.097382Z", + "iopub.status.idle": "2024-08-29T21:13:12.139910Z", + "shell.execute_reply": "2024-08-29T21:13:12.138779Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-02', '2011-01-09', '2011-01-16', '2011-01-23',\n", + " '2011-01-30', '2011-02-06', '2011-02-13', '2011-02-20',\n", + " '2011-02-27', '2011-03-06', '2011-03-13', '2011-03-20',\n", + " '2011-03-27', '2011-04-03', '2011-04-10', '2011-04-17',\n", + " '2011-04-24', '2011-05-01', '2011-05-08', '2011-05-15',\n", + " '2011-05-22', '2011-05-29', '2011-06-05', '2011-06-12',\n", + " '2011-06-19', '2011-06-26', '2011-07-03', '2011-07-10',\n", + " '2011-07-17', '2011-07-24', '2011-07-31', '2011-08-07',\n", + " '2011-08-14', '2011-08-21', '2011-08-28', '2011-09-04',\n", + " '2011-09-11', '2011-09-18', '2011-09-25', '2011-10-02',\n", + " '2011-10-09', '2011-10-16', '2011-10-23', '2011-10-30',\n", + " '2011-11-06', '2011-11-13', '2011-11-20', '2011-11-27',\n", + " '2011-12-04', '2011-12-11', '2011-12-18', '2011-12-25',\n", + " '2012-01-01'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.date_range(start, end, freq=\"W\")" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "ed7a8ede-b4ac-40b7-a9ca-0523b8b2d908", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:12.143926Z", + "iopub.status.busy": "2024-08-29T21:13:12.143641Z", + "iopub.status.idle": "2024-08-29T21:13:12.150593Z", + "shell.execute_reply": "2024-08-29T21:13:12.149956Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-12-05', '2011-12-06', '2011-12-07', '2011-12-08',\n", + " '2011-12-09', '2011-12-12', '2011-12-13', '2011-12-14',\n", + " '2011-12-15', '2011-12-16', '2011-12-19', '2011-12-20',\n", + " '2011-12-21', '2011-12-22', '2011-12-23', '2011-12-26',\n", + " '2011-12-27', '2011-12-28', '2011-12-29', '2011-12-30'],\n", + " dtype='datetime64[ns]', freq='B')" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.bdate_range(end=end, periods=20)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "6ce60028-87c2-4d03-aca4-79d74c713b1d", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:12.154722Z", + "iopub.status.busy": "2024-08-29T21:13:12.154470Z", + "iopub.status.idle": "2024-08-29T21:13:12.159125Z", + "shell.execute_reply": "2024-08-29T21:13:12.158630Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-03', '2011-01-04', '2011-01-05', '2011-01-06',\n", + " '2011-01-07', '2011-01-10', '2011-01-11', '2011-01-12',\n", + " '2011-01-13', '2011-01-14', '2011-01-17', '2011-01-18',\n", + " '2011-01-19', '2011-01-20', '2011-01-21', '2011-01-24',\n", + " '2011-01-25', '2011-01-26', '2011-01-27', '2011-01-28'],\n", + " dtype='datetime64[ns]', freq='B')" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.bdate_range(start=start, periods=20)" + ] + }, + { + "cell_type": "markdown", + "id": "b540e12a-5c2f-4f61-a074-a6a872680e42", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Specifying start, end, and periods will generate a range of evenly spaced dates from start to end inclusively, with periods number of elements in the resulting DatetimeIndex:\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "1470b8ca-270e-4701-bca0-751485a21d3e", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:12.162192Z", + "iopub.status.busy": "2024-08-29T21:13:12.162058Z", + "iopub.status.idle": "2024-08-29T21:13:13.160310Z", + "shell.execute_reply": "2024-08-29T21:13:13.158836Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',\n", + " '2018-01-05'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.date_range(\"2018-01-01\", \"2018-01-05\", periods=5)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "2a0022ee-ba87-4c2b-8c41-16d9271cebe7", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:13.166386Z", + "iopub.status.busy": "2024-08-29T21:13:13.166138Z", + "iopub.status.idle": "2024-08-29T21:13:14.168199Z", + "shell.execute_reply": "2024-08-29T21:13:14.166045Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 10:40:00',\n", + " '2018-01-01 21:20:00', '2018-01-02 08:00:00',\n", + " '2018-01-02 18:40:00', '2018-01-03 05:20:00',\n", + " '2018-01-03 16:00:00', '2018-01-04 02:40:00',\n", + " '2018-01-04 13:20:00', '2018-01-05 00:00:00'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.date_range(\"2018-01-01\", \"2018-01-05\", periods=10)" + ] + }, + { + "cell_type": "markdown", + "id": "1960e1c1-1488-45af-9ac8-ba6ce9fe5d4e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Custom frequency ranges" + ] + }, + { + "cell_type": "markdown", + "id": "2d4f1cd9-4aad-4c44-98c1-65234836a294", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### bdate_range can also generate a range of custom frequency dates by using the weekmask and holidays parameters. These parameters will only be used if a custom frequency string is passed.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "2a322ecb-3c8d-443b-9ad0-a9a3b5ca68d5", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:14.175290Z", + "iopub.status.busy": "2024-08-29T21:13:14.174969Z", + "iopub.status.idle": "2024-08-29T21:13:14.183465Z", + "shell.execute_reply": "2024-08-29T21:13:14.182892Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-03', '2011-01-07', '2011-01-10', '2011-01-12',\n", + " '2011-01-14', '2011-01-17', '2011-01-19', '2011-01-21',\n", + " '2011-01-24', '2011-01-26',\n", + " ...\n", + " '2011-12-09', '2011-12-12', '2011-12-14', '2011-12-16',\n", + " '2011-12-19', '2011-12-21', '2011-12-23', '2011-12-26',\n", + " '2011-12-28', '2011-12-30'],\n", + " dtype='datetime64[ns]', length=154, freq='C')" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "weekmask = \"Mon Wed Fri\"\n", + "\n", + "holidays = [datetime.datetime(2011, 1, 5), datetime.datetime(2011, 3, 14)]\n", + "\n", + "pd.bdate_range(start, end, freq=\"C\", weekmask=weekmask, holidays=holidays)" + ] + }, + { + "cell_type": "markdown", + "id": "efb29c5a-0e24-4e0f-b535-68905f6739a4", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Indexing" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "cc530503-75c4-4aed-b8c4-b589609dc868", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:14.188084Z", + "iopub.status.busy": "2024-08-29T21:13:14.187833Z", + "iopub.status.idle": "2024-08-29T21:13:14.251094Z", + "shell.execute_reply": "2024-08-29T21:13:14.250539Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "offset BME is not implemented in Snowpark pandas API", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[59], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m rng \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mBME\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m ts \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mSeries(np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mrandn(\u001b[38;5;28mlen\u001b[39m(rng)), index\u001b[38;5;241m=\u001b[39mrng)\n\u001b[1;32m 5\u001b[0m ts\u001b[38;5;241m.\u001b[39mindex\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:454\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_standalone_function_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/general.py:2405\u001b[0m, in \u001b[0;36mdate_range\u001b[0;34m(start, end, periods, freq, tz, normalize, name, inclusive, **kwargs)\u001b[0m\n\u001b[1;32m 2401\u001b[0m \u001b[38;5;66;03m# If a timezone is not explicitly given via `tz`, see if one can be inferred from the `start` and `end` endpoints.\u001b[39;00m\n\u001b[1;32m 2402\u001b[0m \u001b[38;5;66;03m# If more than one of these inputs provides a timezone, require that they all agree.\u001b[39;00m\n\u001b[1;32m 2403\u001b[0m tz \u001b[38;5;241m=\u001b[39m _infer_tz_from_endpoints(start, end, tz)\n\u001b[0;32m-> 2405\u001b[0m qc \u001b[38;5;241m=\u001b[39m \u001b[43mSnowflakeQueryCompiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_date_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2406\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2407\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2408\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2409\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2410\u001b[0m \u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2411\u001b[0m \u001b[43m \u001b[49m\u001b[43mleft_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mleft_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2412\u001b[0m \u001b[43m \u001b[49m\u001b[43mright_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mright_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2413\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2414\u001b[0m \u001b[38;5;66;03m# Set date range as index column.\u001b[39;00m\n\u001b[1;32m 2415\u001b[0m qc \u001b[38;5;241m=\u001b[39m qc\u001b[38;5;241m.\u001b[39mset_index_from_columns(qc\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mtolist(), include_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:691\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_date_range\u001b[0;34m(cls, start, end, periods, freq, tz, left_inclusive, right_inclusive)\u001b[0m\n\u001b[1;32m 689\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m ns_values\u001b[38;5;241m.\u001b[39mseries_to_datetime()\n\u001b[1;32m 690\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 691\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m \u001b[43mgenerator_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_irregular_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 692\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\n\u001b[1;32m 693\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 694\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 695\u001b[0m \u001b[38;5;66;03m# Create a linearly spaced date_range in local time\u001b[39;00m\n\u001b[1;32m 696\u001b[0m \u001b[38;5;66;03m# This is the original pandas source code:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[1;32m 701\u001b[0m \u001b[38;5;66;03m# Here we implement it similarly as np.linspace\u001b[39;00m\n\u001b[1;32m 702\u001b[0m div \u001b[38;5;241m=\u001b[39m periods \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;66;03m# type: ignore[operator]\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:216\u001b[0m, in \u001b[0;36mgenerate_irregular_range\u001b[0;34m(start, end, periods, offset)\u001b[0m\n\u001b[1;32m 213\u001b[0m periods \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 215\u001b[0m num_offsets \u001b[38;5;241m=\u001b[39m get_active_session()\u001b[38;5;241m.\u001b[39mrange(start\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, end\u001b[38;5;241m=\u001b[39mperiods, step\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 216\u001b[0m sf_date_or_time_part \u001b[38;5;241m=\u001b[39m \u001b[43m_offset_name_to_sf_date_or_time_part\u001b[49m\u001b[43m(\u001b[49m\u001b[43moffset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 217\u001b[0m dt_col \u001b[38;5;241m=\u001b[39m builtin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDATEADD\u001b[39m\u001b[38;5;124m\"\u001b[39m)(\n\u001b[1;32m 218\u001b[0m sf_date_or_time_part,\n\u001b[1;32m 219\u001b[0m offset\u001b[38;5;241m.\u001b[39mn \u001b[38;5;241m*\u001b[39m col(num_offsets\u001b[38;5;241m.\u001b[39mcolumns[\u001b[38;5;241m0\u001b[39m]),\n\u001b[1;32m 220\u001b[0m pandas_lit(start),\n\u001b[1;32m 221\u001b[0m )\n\u001b[1;32m 222\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m offset\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01min\u001b[39;00m LAST_DAY:\n\u001b[1;32m 223\u001b[0m \u001b[38;5;66;03m# When last day is required, we need to explicitly call LAST_DAY SQL function to convert DATEADD results to the\u001b[39;00m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;66;03m# last day, e.g., adding one month to \"2/29/2024\" using DATEADD results \"3/29/2024\", which is not the last day\u001b[39;00m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# of March. So we need to call LAST_DAY. Also, LAST_DAY only return the date, then we need to reconstruct the\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# timestamp using timestamp_ntz_from_parts\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:162\u001b[0m, in \u001b[0;36m_offset_name_to_sf_date_or_time_part\u001b[0;34m(name)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP:\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP[name]\n\u001b[0;32m--> 162\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43moffset \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m is not implemented in Snowpark pandas API\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: offset BME is not implemented in Snowpark pandas API" + ] + } + ], + "source": [ + "rng = pd.date_range(start, end, freq=\"BME\")\n", + "\n", + "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n", + "\n", + "ts.index" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "4e5da203-33d9-4c93-a14e-2d89a2510cf9", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:14.266591Z", + "iopub.status.busy": "2024-08-29T21:13:14.266382Z", + "iopub.status.idle": "2024-08-29T21:13:16.534061Z", + "shell.execute_reply": "2024-08-29T21:13:16.532702Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',\n", + " '2011-05-31', '2011-06-30', '2011-07-31', '2011-08-31',\n", + " '2011-09-30', '2011-10-31', '2011-11-30', '2011-12-31'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rng = pd.date_range(start, end, freq=\"ME\")\n", + "\n", + "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n", + "\n", + "ts.index" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "fd70b5db-c29c-44dd-9dbf-860dd4099ef8", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:16.540912Z", + "iopub.status.busy": "2024-08-29T21:13:16.540419Z", + "iopub.status.idle": "2024-08-29T21:13:17.091472Z", + "shell.execute_reply": "2024-08-29T21:13:17.088560Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31', '2011-04-30',\n", + " '2011-05-31'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts[:5].index" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "5f1a58ce-ca6e-4cfd-8956-ec4910a9f862", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:17.096716Z", + "iopub.status.busy": "2024-08-29T21:13:17.096346Z", + "iopub.status.idle": "2024-08-29T21:13:17.662846Z", + "shell.execute_reply": "2024-08-29T21:13:17.662393Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2011-01-31', '2011-03-31', '2011-05-31', '2011-07-31',\n", + " '2011-09-30', '2011-11-30'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts[::2].index" + ] + }, + { + "cell_type": "markdown", + "id": "4e303f4f-010b-447e-9c0c-ade4b3f0d250", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### To provide convenience for accessing longer time series, you can also pass in the year or year and month as strings:" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "93ca9d24-72e0-46ef-b844-ae04c53ffea4", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:17.665508Z", + "iopub.status.busy": "2024-08-29T21:13:17.665292Z", + "iopub.status.idle": "2024-08-29T21:13:18.492864Z", + "shell.execute_reply": "2024-08-29T21:13:18.492061Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2011-01-31 -0.953229\n", + "2011-02-28 0.928557\n", + "2011-03-31 -0.444832\n", + "2011-04-30 3.278272\n", + "2011-05-31 0.545594\n", + "2011-06-30 -1.074684\n", + "2011-07-31 -1.505286\n", + "2011-08-31 0.112716\n", + "2011-09-30 1.525040\n", + "2011-10-31 0.438627\n", + "2011-11-30 -1.456351\n", + "2011-12-31 2.020059\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts[\"2011\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "b6a434c7-7865-45d6-86ba-2afeef7216c2", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:18.498185Z", + "iopub.status.busy": "2024-08-29T21:13:18.497811Z", + "iopub.status.idle": "2024-08-29T21:13:19.319099Z", + "shell.execute_reply": "2024-08-29T21:13:19.317369Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2011-06-30 -1.074684\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts[\"2011-6\"]" + ] + }, + { + "cell_type": "markdown", + "id": "885cf000-838e-4287-9870-2470afcb7fc8", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### This type of slicing will work on a DataFrame with a DatetimeIndex as well. Since the partial string selection is a form of label slicing, the endpoints will be included. This would include matching times on an included date:" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "3254e5cd-cc94-479a-9c30-b679de04b870", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:19.329816Z", + "iopub.status.busy": "2024-08-29T21:13:19.329457Z", + "iopub.status.idle": "2024-08-29T21:13:31.791447Z", + "shell.execute_reply": "2024-08-29T21:13:31.790649Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-03-11 10:35:00-0.337715
2013-03-11 10:36:00-2.028548
2013-03-11 10:37:000.726182
2013-03-11 10:38:00-1.167831
2013-03-11 10:39:00-1.285208
\n", + "

100000 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-03-11 10:35:00 -0.337715\n", + "2013-03-11 10:36:00 -2.028548\n", + "2013-03-11 10:37:00 0.726182\n", + "2013-03-11 10:38:00 -1.167831\n", + "2013-03-11 10:39:00 -1.285208\n", + "\n", + "[100000 rows x 1 columns]" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.random.seed(0)\n", + "\n", + "dft = pd.DataFrame(\n", + " np.random.randn(100000, 1),\n", + " columns=[\"A\"],\n", + " index=pd.date_range(\"20130101\", periods=100000, freq=\"min\"),\n", + ")\n", + "\n", + "\n", + "dft" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "4aa1a291-f13e-4b35-a73b-333411df51de", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:31.797703Z", + "iopub.status.busy": "2024-08-29T21:13:31.797160Z", + "iopub.status.idle": "2024-08-29T21:13:40.117417Z", + "shell.execute_reply": "2024-08-29T21:13:40.113166Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-03-11 10:35:00-0.337715
2013-03-11 10:36:00-2.028548
2013-03-11 10:37:000.726182
2013-03-11 10:38:00-1.167831
2013-03-11 10:39:00-1.285208
\n", + "

100000 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-03-11 10:35:00 -0.337715\n", + "2013-03-11 10:36:00 -2.028548\n", + "2013-03-11 10:37:00 0.726182\n", + "2013-03-11 10:38:00 -1.167831\n", + "2013-03-11 10:39:00 -1.285208\n", + "\n", + "[100000 rows x 1 columns]" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft.loc[\"2013\"]" + ] + }, + { + "cell_type": "markdown", + "id": "510776d8-1c0f-4c19-a8eb-bf3f7eb6d939", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### This starts on the very first time in the month, and includes the last date and time for the month:" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "73ad4bae-beb3-4933-8a23-b66471bd2128", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:40.125450Z", + "iopub.status.busy": "2024-08-29T21:13:40.125049Z", + "iopub.status.idle": "2024-08-29T21:13:48.494065Z", + "shell.execute_reply": "2024-08-29T21:13:48.493171Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-02-28 23:55:00-3.284701
2013-02-28 23:56:000.475275
2013-02-28 23:57:000.501877
2013-02-28 23:58:000.222138
2013-02-28 23:59:000.717464
\n", + "

84960 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-02-28 23:55:00 -3.284701\n", + "2013-02-28 23:56:00 0.475275\n", + "2013-02-28 23:57:00 0.501877\n", + "2013-02-28 23:58:00 0.222138\n", + "2013-02-28 23:59:00 0.717464\n", + "\n", + "[84960 rows x 1 columns]" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[\"2013-1\":\"2013-2\"]" + ] + }, + { + "cell_type": "markdown", + "id": "d9dbd8b5-97a1-4458-9efe-69abc1000805", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### This specifies a stop time that includes all of the times on the last day:" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "7541bba4-f71d-49d5-8901-c7882ceb04f0", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:48.499398Z", + "iopub.status.busy": "2024-08-29T21:13:48.499040Z", + "iopub.status.idle": "2024-08-29T21:13:57.151139Z", + "shell.execute_reply": "2024-08-29T21:13:57.144730Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-02-28 23:55:00-3.284701
2013-02-28 23:56:000.475275
2013-02-28 23:57:000.501877
2013-02-28 23:58:000.222138
2013-02-28 23:59:000.717464
\n", + "

84960 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-02-28 23:55:00 -3.284701\n", + "2013-02-28 23:56:00 0.475275\n", + "2013-02-28 23:57:00 0.501877\n", + "2013-02-28 23:58:00 0.222138\n", + "2013-02-28 23:59:00 0.717464\n", + "\n", + "[84960 rows x 1 columns]" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[\"2013-1\":\"2013-2-28\"]" + ] + }, + { + "cell_type": "markdown", + "id": "2b90d0fd-8479-4741-b015-d8e8219c6e2c", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### This specifies an exact stop time (and is not the same as the above):" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "04deeb28-36e8-4c35-904a-02d71a9abe7c", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:13:57.155897Z", + "iopub.status.busy": "2024-08-29T21:13:57.155648Z", + "iopub.status.idle": "2024-08-29T21:14:05.751918Z", + "shell.execute_reply": "2024-08-29T21:14:05.750961Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-02-27 23:56:00-0.036098
2013-02-27 23:57:00-1.679458
2013-02-27 23:58:000.443969
2013-02-27 23:59:001.390478
2013-02-28 00:00:000.569440
\n", + "

83521 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-02-27 23:56:00 -0.036098\n", + "2013-02-27 23:57:00 -1.679458\n", + "2013-02-27 23:58:00 0.443969\n", + "2013-02-27 23:59:00 1.390478\n", + "2013-02-28 00:00:00 0.569440\n", + "\n", + "[83521 rows x 1 columns]" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[\"2013-1\":\"2013-2-28 00:00:00\"]" + ] + }, + { + "cell_type": "markdown", + "id": "f17d1737-ded7-4b19-8e20-718c80872edb", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### We are stopping on the included end-point as it is part of the index:" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "445ed641-74d9-4026-ad94-3172565e7db2", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:05.761408Z", + "iopub.status.busy": "2024-08-29T21:14:05.761002Z", + "iopub.status.idle": "2024-08-29T21:14:14.045516Z", + "shell.execute_reply": "2024-08-29T21:14:14.044532Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-15 00:00:00-1.195459
2013-01-15 00:01:001.543360
2013-01-15 00:02:000.237914
2013-01-15 00:03:000.767214
2013-01-15 00:04:00-2.109814
......
2013-01-15 12:26:000.817564
2013-01-15 12:27:00-0.649760
2013-01-15 12:28:001.245159
2013-01-15 12:29:000.300473
2013-01-15 12:30:001.167551
\n", + "

751 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-15 00:00:00 -1.195459\n", + "2013-01-15 00:01:00 1.543360\n", + "2013-01-15 00:02:00 0.237914\n", + "2013-01-15 00:03:00 0.767214\n", + "2013-01-15 00:04:00 -2.109814\n", + "... ...\n", + "2013-01-15 12:26:00 0.817564\n", + "2013-01-15 12:27:00 -0.649760\n", + "2013-01-15 12:28:00 1.245159\n", + "2013-01-15 12:29:00 0.300473\n", + "2013-01-15 12:30:00 1.167551\n", + "\n", + "[751 rows x 1 columns]" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[\"2013-1-15\":\"2013-1-15 12:30:00\"]" + ] + }, + { + "cell_type": "markdown", + "id": "d87d800b-29fb-4b93-a28f-0de738a3ae07", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### DatetimeIndex partial string indexing also works on a DataFrame with a MultiIndex:" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "e8c3388b-374a-44a8-a060-b5531589bbf6", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:14.053801Z", + "iopub.status.busy": "2024-08-29T21:14:14.053416Z", + "iopub.status.idle": "2024-08-29T21:14:16.356105Z", + "shell.execute_reply": "2024-08-29T21:14:16.354167Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:00a-0.483797
b1.288057
2013-01-01 12:00:00a-0.129879
b-0.198078
2013-01-02 00:00:00a-0.334488
b-0.391443
2013-01-02 12:00:00a-0.612406
b-0.676524
2013-01-03 00:00:00a1.327230
b-0.448695
2013-01-03 12:00:00a-0.316407
b0.030831
2013-01-04 00:00:00a-0.313357
b-0.173259
2013-01-04 12:00:00a-0.327369
b0.944368
2013-01-05 00:00:00a1.122017
b0.112339
2013-01-05 12:00:00a1.372340
b2.062562
\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 a -0.483797\n", + " b 1.288057\n", + "2013-01-01 12:00:00 a -0.129879\n", + " b -0.198078\n", + "2013-01-02 00:00:00 a -0.334488\n", + " b -0.391443\n", + "2013-01-02 12:00:00 a -0.612406\n", + " b -0.676524\n", + "2013-01-03 00:00:00 a 1.327230\n", + " b -0.448695\n", + "2013-01-03 12:00:00 a -0.316407\n", + " b 0.030831\n", + "2013-01-04 00:00:00 a -0.313357\n", + " b -0.173259\n", + "2013-01-04 12:00:00 a -0.327369\n", + " b 0.944368\n", + "2013-01-05 00:00:00 a 1.122017\n", + " b 0.112339\n", + "2013-01-05 12:00:00 a 1.372340\n", + " b 2.062562" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft2 = pd.DataFrame(\n", + " np.random.randn(20, 1),\n", + " columns=[\"A\"],\n", + " index=pd.MultiIndex.from_product(\n", + " [pd.date_range(\"20130101\", periods=10, freq=\"12h\"), [\"a\", \"b\"]]\n", + " ),\n", + ")\n", + "\n", + "\n", + "dft2" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "cf34669d-75ef-4baf-89e0-c00b284fb23d", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:16.364311Z", + "iopub.status.busy": "2024-08-29T21:14:16.363818Z", + "iopub.status.idle": "2024-08-29T21:14:17.890076Z", + "shell.execute_reply": "2024-08-29T21:14:17.889537Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-05 00:00:00a1.122017
b0.112339
2013-01-05 12:00:00a1.372340
b2.062562
\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-05 00:00:00 a 1.122017\n", + " b 0.112339\n", + "2013-01-05 12:00:00 a 1.372340\n", + " b 2.062562" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft2.loc[\"2013-01-05\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "4fe86ab1-49df-4661-a653-b744cf985345", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:17.893683Z", + "iopub.status.busy": "2024-08-29T21:14:17.893463Z", + "iopub.status.idle": "2024-08-29T21:14:17.942532Z", + "shell.execute_reply": "2024-08-29T21:14:17.942111Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method DataFrame.swaplevel", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[73], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m idx \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mIndexSlice\n\u001b[0;32m----> 3\u001b[0m dft2 \u001b[38;5;241m=\u001b[39m \u001b[43mdft2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mswaplevel\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39msort_index()\n\u001b[1;32m 5\u001b[0m dft2\u001b[38;5;241m.\u001b[39mloc[idx[:, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2013-01-05\u001b[39m\u001b[38;5;124m\"\u001b[39m], :]\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method DataFrame.swaplevel" + ] + } + ], + "source": [ + "idx = pd.IndexSlice\n", + "\n", + "dft2 = dft2.swaplevel(0, 1).sort_index()\n", + "\n", + "dft2.loc[idx[:, \"2013-01-05\"], :]" + ] + }, + { + "cell_type": "markdown", + "id": "0596aa8e-1aa3-4854-91f0-2f512fd2d443", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### Slicing with string indexing also honors UTC offset." + ] + }, + { + "cell_type": "markdown", + "id": "ad1362d9-364e-443a-b1fe-0029840d7d61", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Slice vs. exact match" + ] + }, + { + "cell_type": "markdown", + "id": "80b2fc81-d35b-4ed3-80af-baa52d5c288e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of the index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match.\n", + "\n", + "Consider a Series object with a minute resolution index:" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "cfaa9748-de47-4992-9b34-14da93db7b14", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:17.948629Z", + "iopub.status.busy": "2024-08-29T21:14:17.948432Z", + "iopub.status.idle": "2024-08-29T21:14:18.805828Z", + "shell.execute_reply": "2024-08-29T21:14:18.805536Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Index.resolution is not yet implemented", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/extensions/index.py:250\u001b[0m, in \u001b[0;36mIndex.__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mobject\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 251\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "\u001b[0;31mAttributeError\u001b[0m: 'DatetimeIndex' object has no attribute 'resolution'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[74], line 9\u001b[0m\n\u001b[1;32m 1\u001b[0m series_minute \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mSeries(\n\u001b[1;32m 2\u001b[0m [\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m2\u001b[39m, \u001b[38;5;241m3\u001b[39m],\n\u001b[1;32m 3\u001b[0m pd\u001b[38;5;241m.\u001b[39mDatetimeIndex(\n\u001b[1;32m 4\u001b[0m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2011-12-31 23:59:00\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-01-01 00:00:00\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-01-01 00:02:00\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[1;32m 5\u001b[0m ),\n\u001b[1;32m 6\u001b[0m )\n\u001b[0;32m----> 9\u001b[0m \u001b[43mseries_minute\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresolution\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/extensions/index.py:257\u001b[0m, in \u001b[0;36mIndex.__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 253\u001b[0m native_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_NATIVE_INDEX_TYPE([])\n\u001b[1;32m 254\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(native_index, key):\n\u001b[1;32m 255\u001b[0m \u001b[38;5;66;03m# Any methods that not supported by the current Index.py but exist in a\u001b[39;00m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;66;03m# native pandas index object should raise a not implemented error for now.\u001b[39;00m\n\u001b[0;32m--> 257\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 258\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mIndex.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mkey\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m is not yet implemented\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 259\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Index.resolution is not yet implemented" + ] + } + ], + "source": [ + "series_minute = pd.Series(\n", + " [1, 2, 3],\n", + " pd.DatetimeIndex(\n", + " [\"2011-12-31 23:59:00\", \"2012-01-01 00:00:00\", \"2012-01-01 00:02:00\"]\n", + " ),\n", + ")\n", + "\n", + "\n", + "series_minute.index.resolution" + ] + }, + { + "cell_type": "markdown", + "id": "424987b9-88eb-4e81-b9b4-e4e188fabda6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "##### A timestamp string less accurate than a minute gives a Series object." + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "b8997514-bb79-4da7-bcbb-e0262d04412f", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:18.807978Z", + "iopub.status.busy": "2024-08-29T21:14:18.807841Z", + "iopub.status.idle": "2024-08-29T21:14:19.436725Z", + "shell.execute_reply": "2024-08-29T21:14:19.436157Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2011-12-31 23:59:00 1\n", + "Freq: None, dtype: int64" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "series_minute[\"2011-12-31 23\"] # we return series instead" + ] + }, + { + "cell_type": "markdown", + "id": "971505bf-eed4-4fdc-a764-4572b25b353b", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Exact indexing" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "1a24da8a-4736-4120-bb69-5380b6b59f53", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:19.439721Z", + "iopub.status.busy": "2024-08-29T21:14:19.439536Z", + "iopub.status.idle": "2024-08-29T21:14:28.000648Z", + "shell.execute_reply": "2024-08-29T21:14:27.999577Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 00:00:001.764052
2013-01-01 00:01:000.400157
2013-01-01 00:02:000.978738
2013-01-01 00:03:002.240893
2013-01-01 00:04:001.867558
......
2013-02-27 23:56:00-0.036098
2013-02-27 23:57:00-1.679458
2013-02-27 23:58:000.443969
2013-02-27 23:59:001.390478
2013-02-28 00:00:000.569440
\n", + "

83521 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 00:00:00 1.764052\n", + "2013-01-01 00:01:00 0.400157\n", + "2013-01-01 00:02:00 0.978738\n", + "2013-01-01 00:03:00 2.240893\n", + "2013-01-01 00:04:00 1.867558\n", + "... ...\n", + "2013-02-27 23:56:00 -0.036098\n", + "2013-02-27 23:57:00 -1.679458\n", + "2013-02-27 23:58:00 0.443969\n", + "2013-02-27 23:59:00 1.390478\n", + "2013-02-28 00:00:00 0.569440\n", + "\n", + "[83521 rows x 1 columns]" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[datetime.datetime(2013, 1, 1): datetime.datetime(2013, 2, 28)]" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "d62bee69-00d5-4fed-bf7c-5f6aee22eade", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:28.008282Z", + "iopub.status.busy": "2024-08-29T21:14:28.007924Z", + "iopub.status.idle": "2024-08-29T21:14:36.438112Z", + "shell.execute_reply": "2024-08-29T21:14:36.434977Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
A
2013-01-01 10:12:000.605120
2013-01-01 10:13:000.895556
2013-01-01 10:14:00-0.131909
2013-01-01 10:15:000.404762
2013-01-01 10:16:000.223844
......
2013-02-28 10:08:000.746108
2013-02-28 10:09:001.754498
2013-02-28 10:10:00-0.622373
2013-02-28 10:11:00-0.449793
2013-02-28 10:12:000.848123
\n", + "

83521 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " A\n", + "2013-01-01 10:12:00 0.605120\n", + "2013-01-01 10:13:00 0.895556\n", + "2013-01-01 10:14:00 -0.131909\n", + "2013-01-01 10:15:00 0.404762\n", + "2013-01-01 10:16:00 0.223844\n", + "... ...\n", + "2013-02-28 10:08:00 0.746108\n", + "2013-02-28 10:09:00 1.754498\n", + "2013-02-28 10:10:00 -0.622373\n", + "2013-02-28 10:11:00 -0.449793\n", + "2013-02-28 10:12:00 0.848123\n", + "\n", + "[83521 rows x 1 columns]" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dft[\n", + " datetime.datetime(2013, 1, 1, 10, 12, 0): datetime.datetime(\n", + " 2013, 2, 28, 10, 12, 0\n", + " )\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "0231045c-df35-4892-9069-8819d41a8d55", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Truncating & fancy indexing" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "62e2459c-ab53-4f7c-aea7-a623717d4f36", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:36.443746Z", + "iopub.status.busy": "2024-08-29T21:14:36.443343Z", + "iopub.status.idle": "2024-08-29T21:14:38.169327Z", + "shell.execute_reply": "2024-08-29T21:14:38.168887Z" + }, + "scrolled": true, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method Series.truncate", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[78], line 5\u001b[0m\n\u001b[1;32m 1\u001b[0m rng2 \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mdate_range(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2011-01-01\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2012-01-01\u001b[39m\u001b[38;5;124m\"\u001b[39m, freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mW\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 3\u001b[0m ts2 \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mSeries(np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mrandn(\u001b[38;5;28mlen\u001b[39m(rng2)), index\u001b[38;5;241m=\u001b[39mrng2)\n\u001b[0;32m----> 5\u001b[0m \u001b[43mts2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtruncate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbefore\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m2011-11\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mafter\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m2011-12\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method Series.truncate" + ] + } + ], + "source": [ + "rng2 = pd.date_range(\"2011-01-01\", \"2012-01-01\", freq=\"W\")\n", + "\n", + "ts2 = pd.Series(np.random.randn(len(rng2)), index=rng2)\n", + "\n", + "ts2.truncate(before=\"2011-11\", after=\"2011-12\")" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "6d0d2c8b-205f-4a57-b7bd-301c7531b1ad", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:38.175988Z", + "iopub.status.busy": "2024-08-29T21:14:38.175829Z", + "iopub.status.idle": "2024-08-29T21:14:38.947568Z", + "shell.execute_reply": "2024-08-29T21:14:38.946986Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2011-11-06 -0.433416\n", + "2011-11-13 0.773872\n", + "2011-11-20 -0.834212\n", + "2011-11-27 -0.728240\n", + "2011-12-04 0.674975\n", + "2011-12-11 -0.477772\n", + "2011-12-18 1.492301\n", + "2011-12-25 -0.658391\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts2[\"2011-11\":\"2011-12\"]" + ] + }, + { + "cell_type": "markdown", + "id": "362a58c6-68a8-4103-96dd-569752c6d199", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Time/date components" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "0a5647ae-a168-4c3e-98a5-381cdd2c61c2", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:38.952582Z", + "iopub.status.busy": "2024-08-29T21:14:38.952189Z", + "iopub.status.idle": "2024-08-29T21:14:39.580562Z", + "shell.execute_reply": "2024-08-29T21:14:39.580158Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Index.isocalendar is not yet implemented", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/extensions/index.py:250\u001b[0m, in \u001b[0;36mIndex.__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 249\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 250\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mobject\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__getattribute__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 251\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n", + "\u001b[0;31mAttributeError\u001b[0m: 'DatetimeIndex' object has no attribute 'isocalendar'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[80], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m idx \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mdate_range(start\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m2019-12-29\u001b[39m\u001b[38;5;124m\"\u001b[39m, freq\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mD\u001b[39m\u001b[38;5;124m\"\u001b[39m, periods\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m4\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43midx\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43misocalendar\u001b[49m()\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/extensions/index.py:257\u001b[0m, in \u001b[0;36mIndex.__getattr__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 253\u001b[0m native_index \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_NATIVE_INDEX_TYPE([])\n\u001b[1;32m 254\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(native_index, key):\n\u001b[1;32m 255\u001b[0m \u001b[38;5;66;03m# Any methods that not supported by the current Index.py but exist in a\u001b[39;00m\n\u001b[1;32m 256\u001b[0m \u001b[38;5;66;03m# native pandas index object should raise a not implemented error for now.\u001b[39;00m\n\u001b[0;32m--> 257\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 258\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mIndex.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mkey\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m is not yet implemented\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 259\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 260\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m err\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Index.isocalendar is not yet implemented" + ] + } + ], + "source": [ + "idx = pd.date_range(start=\"2019-12-29\", freq=\"D\", periods=4)\n", + "\n", + "idx.isocalendar()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "0c48589b-bcf8-4d43-a1d6-d2df60ff0e0f", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:39.583089Z", + "iopub.status.busy": "2024-08-29T21:14:39.582784Z", + "iopub.status.idle": "2024-08-29T21:14:41.713938Z", + "shell.execute_reply": "2024-08-29T21:14:41.713060Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
yearweekday
2019-12-292019527
2019-12-30202011
2019-12-31202012
2020-01-01202013
\n", + "
" + ], + "text/plain": [ + " year week day\n", + "2019-12-29 2019 52 7\n", + "2019-12-30 2020 1 1\n", + "2019-12-31 2020 1 2\n", + "2020-01-01 2020 1 3" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "idx.to_series().dt.isocalendar()" + ] + }, + { + "cell_type": "markdown", + "id": "9b5b10c8-e72f-4405-bdeb-1bced14c8edf", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# DateOffset objects" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "1febbd6a-1b57-4e6a-a48a-3eac565ad61d", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:41.721027Z", + "iopub.status.busy": "2024-08-29T21:14:41.720702Z", + "iopub.status.idle": "2024-08-29T21:14:41.729443Z", + "shell.execute_reply": "2024-08-29T21:14:41.728809Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')" + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts = pd.Timestamp(\"2016-10-30 00:00:00\", tz=\"Europe/Helsinki\")\n", + "\n", + "ts + pd.Timedelta(days=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "b8d03fb0-826f-4698-a6d0-f2b63f7d38dc", + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-29T21:14:41.733948Z", + "iopub.status.busy": "2024-08-29T21:14:41.733522Z", + "iopub.status.idle": "2024-08-29T21:14:41.737568Z", + "shell.execute_reply": "2024-08-29T21:14:41.737119Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts + pd.DateOffset(days=1)" + ] + }, + { + "cell_type": "markdown", + "id": "7d4c2685-79b1-4262-9c53-e0901b730d36", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Time Series-related instance methods" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "7544f420-459f-4320-89f4-a9897ad4daf8", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:41.742020Z", + "iopub.status.busy": "2024-08-29T21:14:41.741770Z", + "iopub.status.idle": "2024-08-29T21:14:43.045122Z", + "shell.execute_reply": "2024-08-29T21:14:43.043759Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2011-01-31 NaN\n", + "2011-02-28 0.0\n", + "2011-03-31 1.0\n", + "2011-04-30 2.0\n", + "2011-05-31 3.0\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ts = pd.Series(range(len(rng)), index=rng)\n", + "\n", + "ts = ts[:5]\n", + "\n", + "ts.shift(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "ee59ce08-9bd7-4fc2-93e1-55868cb47028", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:43.050774Z", + "iopub.status.busy": "2024-08-29T21:14:43.050375Z", + "iopub.status.idle": "2024-08-29T21:14:43.284463Z", + "shell.execute_reply": "2024-08-29T21:14:43.284107Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "shifting index values not yet supported.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[85], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mD\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:2551\u001b[0m, in \u001b[0;36mSeries.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 2547\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 2548\u001b[0m \u001b[38;5;66;03m# pandas compatible error.\u001b[39;00m\n\u001b[1;32m 2549\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo axis named 1 for object type Series\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/base.py:3125\u001b[0m, in \u001b[0;36mBasePandasDataset.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 3122\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;241m==\u001b[39m no_default:\n\u001b[1;32m 3123\u001b[0m fill_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3125\u001b[0m new_query_compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query_compiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3126\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\n\u001b[1;32m 3127\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3128\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_or_update_from_compiler(new_query_compiler, \u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1672\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 1669\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_shift_values(periods, axis, fill_value) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1670\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1671\u001b[0m \u001b[38;5;66;03m# axis parameter ignored, should be 0 for manipulating index. Revisit in SNOW-1023324\u001b[39;00m\n\u001b[0;32m-> 1672\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_shift_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1636\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler._shift_index\u001b[0;34m(self, periods, freq)\u001b[0m\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m freq \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfreq must be specified when calling shift index\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1635\u001b[0m \u001b[38;5;66;03m# TODO: SNOW-1023324, implement shifting index only.\u001b[39;00m\n\u001b[0;32m-> 1636\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshifting index values not yet supported.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: shifting index values not yet supported." + ] + } + ], + "source": [ + "ts.shift(5, freq=\"D\")" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "3548edbe-b2c0-4d7b-b111-b90931af0306", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:43.306275Z", + "iopub.status.busy": "2024-08-29T21:14:43.306123Z", + "iopub.status.idle": "2024-08-29T21:14:43.358939Z", + "shell.execute_reply": "2024-08-29T21:14:43.358609Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "shifting index values not yet supported.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[86], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moffsets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mBDay\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:2551\u001b[0m, in \u001b[0;36mSeries.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 2547\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 2548\u001b[0m \u001b[38;5;66;03m# pandas compatible error.\u001b[39;00m\n\u001b[1;32m 2549\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo axis named 1 for object type Series\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/base.py:3125\u001b[0m, in \u001b[0;36mBasePandasDataset.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 3122\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;241m==\u001b[39m no_default:\n\u001b[1;32m 3123\u001b[0m fill_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3125\u001b[0m new_query_compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query_compiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3126\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\n\u001b[1;32m 3127\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3128\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_or_update_from_compiler(new_query_compiler, \u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1672\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 1669\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_shift_values(periods, axis, fill_value) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1670\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1671\u001b[0m \u001b[38;5;66;03m# axis parameter ignored, should be 0 for manipulating index. Revisit in SNOW-1023324\u001b[39;00m\n\u001b[0;32m-> 1672\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_shift_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1636\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler._shift_index\u001b[0;34m(self, periods, freq)\u001b[0m\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m freq \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfreq must be specified when calling shift index\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1635\u001b[0m \u001b[38;5;66;03m# TODO: SNOW-1023324, implement shifting index only.\u001b[39;00m\n\u001b[0;32m-> 1636\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshifting index values not yet supported.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: shifting index values not yet supported." + ] + } + ], + "source": [ + "ts.shift(5, freq=pd.offsets.BDay())" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "a980c67a-1717-425b-b74f-a277cf73f576", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:43.380989Z", + "iopub.status.busy": "2024-08-29T21:14:43.380843Z", + "iopub.status.idle": "2024-08-29T21:14:43.436653Z", + "shell.execute_reply": "2024-08-29T21:14:43.436299Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "shifting index values not yet supported.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[87], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mBME\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/series.py:2551\u001b[0m, in \u001b[0;36mSeries.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 2547\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m 2548\u001b[0m \u001b[38;5;66;03m# pandas compatible error.\u001b[39;00m\n\u001b[1;32m 2549\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo axis named 1 for object type Series\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 2551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/base.py:3125\u001b[0m, in \u001b[0;36mBasePandasDataset.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 3122\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;241m==\u001b[39m no_default:\n\u001b[1;32m 3123\u001b[0m fill_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 3125\u001b[0m new_query_compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query_compiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshift\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3126\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfill_value\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msuffix\u001b[49m\n\u001b[1;32m 3127\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3128\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_or_update_from_compiler(new_query_compiler, \u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1672\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.shift\u001b[0;34m(self, periods, freq, axis, fill_value, suffix)\u001b[0m\n\u001b[1;32m 1669\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_shift_values(periods, axis, fill_value) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 1670\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1671\u001b[0m \u001b[38;5;66;03m# axis parameter ignored, should be 0 for manipulating index. Revisit in SNOW-1023324\u001b[39;00m\n\u001b[0;32m-> 1672\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_shift_index\u001b[49m\u001b[43m(\u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:1636\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler._shift_index\u001b[0;34m(self, periods, freq)\u001b[0m\n\u001b[1;32m 1633\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m freq \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfreq must be specified when calling shift index\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1635\u001b[0m \u001b[38;5;66;03m# TODO: SNOW-1023324, implement shifting index only.\u001b[39;00m\n\u001b[0;32m-> 1636\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mshifting index values not yet supported.\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: shifting index values not yet supported." + ] + } + ], + "source": [ + "ts.shift(5, freq=\"BME\")" + ] + }, + { + "cell_type": "markdown", + "id": "00d1f5cf-c073-4c60-949b-404953b80000", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "## Frequency conversion" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "0a0b12b0-3ed9-476e-b98c-c1d04162627b", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:43.455819Z", + "iopub.status.busy": "2024-08-29T21:14:43.455677Z", + "iopub.status.idle": "2024-08-29T21:14:43.498018Z", + "shell.execute_reply": "2024-08-29T21:14:43.497707Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "offset B is not implemented in Snowpark pandas API", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[88], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m dr \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m1/1/2010\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moffsets\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mBDay\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m ts \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mSeries(np\u001b[38;5;241m.\u001b[39mrandom\u001b[38;5;241m.\u001b[39mrandn(\u001b[38;5;241m3\u001b[39m), index\u001b[38;5;241m=\u001b[39mdr)\n\u001b[1;32m 5\u001b[0m ts\u001b[38;5;241m.\u001b[39masfreq(pd\u001b[38;5;241m.\u001b[39moffsets\u001b[38;5;241m.\u001b[39mBDay())\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:454\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_standalone_function_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/pandas/general.py:2405\u001b[0m, in \u001b[0;36mdate_range\u001b[0;34m(start, end, periods, freq, tz, normalize, name, inclusive, **kwargs)\u001b[0m\n\u001b[1;32m 2401\u001b[0m \u001b[38;5;66;03m# If a timezone is not explicitly given via `tz`, see if one can be inferred from the `start` and `end` endpoints.\u001b[39;00m\n\u001b[1;32m 2402\u001b[0m \u001b[38;5;66;03m# If more than one of these inputs provides a timezone, require that they all agree.\u001b[39;00m\n\u001b[1;32m 2403\u001b[0m tz \u001b[38;5;241m=\u001b[39m _infer_tz_from_endpoints(start, end, tz)\n\u001b[0;32m-> 2405\u001b[0m qc \u001b[38;5;241m=\u001b[39m \u001b[43mSnowflakeQueryCompiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_date_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2406\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2407\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2408\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2409\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2410\u001b[0m \u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2411\u001b[0m \u001b[43m \u001b[49m\u001b[43mleft_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mleft_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2412\u001b[0m \u001b[43m \u001b[49m\u001b[43mright_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mright_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2413\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2414\u001b[0m \u001b[38;5;66;03m# Set date range as index column.\u001b[39;00m\n\u001b[1;32m 2415\u001b[0m qc \u001b[38;5;241m=\u001b[39m qc\u001b[38;5;241m.\u001b[39mset_index_from_columns(qc\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mtolist(), include_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:691\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_date_range\u001b[0;34m(cls, start, end, periods, freq, tz, left_inclusive, right_inclusive)\u001b[0m\n\u001b[1;32m 689\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m ns_values\u001b[38;5;241m.\u001b[39mseries_to_datetime()\n\u001b[1;32m 690\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 691\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m \u001b[43mgenerator_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_irregular_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 692\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\n\u001b[1;32m 693\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 694\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 695\u001b[0m \u001b[38;5;66;03m# Create a linearly spaced date_range in local time\u001b[39;00m\n\u001b[1;32m 696\u001b[0m \u001b[38;5;66;03m# This is the original pandas source code:\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[1;32m 701\u001b[0m \u001b[38;5;66;03m# Here we implement it similarly as np.linspace\u001b[39;00m\n\u001b[1;32m 702\u001b[0m div \u001b[38;5;241m=\u001b[39m periods \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;66;03m# type: ignore[operator]\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:216\u001b[0m, in \u001b[0;36mgenerate_irregular_range\u001b[0;34m(start, end, periods, offset)\u001b[0m\n\u001b[1;32m 213\u001b[0m periods \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 215\u001b[0m num_offsets \u001b[38;5;241m=\u001b[39m get_active_session()\u001b[38;5;241m.\u001b[39mrange(start\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, end\u001b[38;5;241m=\u001b[39mperiods, step\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 216\u001b[0m sf_date_or_time_part \u001b[38;5;241m=\u001b[39m \u001b[43m_offset_name_to_sf_date_or_time_part\u001b[49m\u001b[43m(\u001b[49m\u001b[43moffset\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mname\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 217\u001b[0m dt_col \u001b[38;5;241m=\u001b[39m builtin(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDATEADD\u001b[39m\u001b[38;5;124m\"\u001b[39m)(\n\u001b[1;32m 218\u001b[0m sf_date_or_time_part,\n\u001b[1;32m 219\u001b[0m offset\u001b[38;5;241m.\u001b[39mn \u001b[38;5;241m*\u001b[39m col(num_offsets\u001b[38;5;241m.\u001b[39mcolumns[\u001b[38;5;241m0\u001b[39m]),\n\u001b[1;32m 220\u001b[0m pandas_lit(start),\n\u001b[1;32m 221\u001b[0m )\n\u001b[1;32m 222\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m offset\u001b[38;5;241m.\u001b[39mname \u001b[38;5;129;01min\u001b[39;00m LAST_DAY:\n\u001b[1;32m 223\u001b[0m \u001b[38;5;66;03m# When last day is required, we need to explicitly call LAST_DAY SQL function to convert DATEADD results to the\u001b[39;00m\n\u001b[1;32m 224\u001b[0m \u001b[38;5;66;03m# last day, e.g., adding one month to \"2/29/2024\" using DATEADD results \"3/29/2024\", which is not the last day\u001b[39;00m\n\u001b[1;32m 225\u001b[0m \u001b[38;5;66;03m# of March. So we need to call LAST_DAY. Also, LAST_DAY only return the date, then we need to reconstruct the\u001b[39;00m\n\u001b[1;32m 226\u001b[0m \u001b[38;5;66;03m# timestamp using timestamp_ntz_from_parts\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:162\u001b[0m, in \u001b[0;36m_offset_name_to_sf_date_or_time_part\u001b[0;34m(name)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP:\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m OFFSET_NAME_TO_SF_DATE_OR_TIME_PART_MAP[name]\n\u001b[0;32m--> 162\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 163\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43moffset \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m is not implemented in Snowpark pandas API\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 164\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: offset B is not implemented in Snowpark pandas API" + ] + } + ], + "source": [ + "dr = pd.date_range(\"1/1/2010\", periods=3, freq=3 * pd.offsets.BDay())\n", + "\n", + "ts = pd.Series(np.random.randn(3), index=dr)\n", + "\n", + "ts.asfreq(pd.offsets.BDay())" + ] + }, + { + "cell_type": "markdown", + "id": "0bf19b9b-5cf1-4d1e-958b-a056a366464e", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "# Converting between representations" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "a0e7fb20-dd4a-456a-8557-aeea61ac0c3b", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:43.513174Z", + "iopub.status.busy": "2024-08-29T21:14:43.513031Z", + "iopub.status.idle": "2024-08-29T21:14:45.394562Z", + "shell.execute_reply": "2024-08-29T21:14:45.393409Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2012-01-31 -0.513578\n", + "2012-02-29 -0.471256\n", + "2012-03-31 1.608285\n", + "2012-04-30 -1.384413\n", + "2012-05-31 2.278423\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rng = pd.date_range(\"1/1/2012\", periods=5, freq=\"ME\")\n", + "\n", + "ts = pd.Series(np.random.randn(len(rng)), index=rng)\n", + "\n", + "ts" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "96b177da-d407-4297-bcc5-d7f2a8b4ee12", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.400225Z", + "iopub.status.busy": "2024-08-29T21:14:45.399854Z", + "iopub.status.idle": "2024-08-29T21:14:45.449274Z", + "shell.execute_reply": "2024-08-29T21:14:45.448910Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NotImplementedError", + "evalue": "Snowpark pandas does not yet support the method Series.to_period", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[90], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m ps \u001b[38;5;241m=\u001b[39m \u001b[43mts\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mto_period\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3\u001b[0m ps\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:414\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 407\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 408\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 409\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 413\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 414\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 415\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 416\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 417\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 418\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 419\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 420\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_method_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_method_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:341\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n\u001b[0;32m--> 341\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 343\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 344\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/_internal/telemetry.py:327\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 321\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 322\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 323\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 324\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 325\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 326\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 327\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 329\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 330\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 331\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 332\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 339\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 340\u001b[0m )\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:117\u001b[0m, in \u001b[0;36m_make_not_implemented_decorator..not_implemented_decorator..make_error_raiser..raise_not_implemented_method_error\u001b[0;34m(cls_or_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 116\u001b[0m non_null_attribute_prefix \u001b[38;5;241m=\u001b[39m attribute_prefix\n\u001b[0;32m--> 117\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 118\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43m_snowpark_pandas_does_not_yet_support\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m method \u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mnon_null_attribute_prefix\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mname\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 119\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m/opt/homebrew/Caskroom/miniconda/base/envs/snowpandas-dev-3.9/lib/python3.9/site-packages/snowflake/snowpark/modin/plugin/utils/error_message.py:163\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 161\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 162\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 163\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", + "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas does not yet support the method Series.to_period" + ] + } + ], + "source": [ + "ps = ts.to_period()\n", + "\n", + "ps" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "id": "781d74b4-a7fd-4f58-a8af-fa21598502b0", + "metadata": { + "editable": true, + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.455603Z", + "iopub.status.busy": "2024-08-29T21:14:45.455404Z", + "iopub.status.idle": "2024-08-29T21:14:45.464053Z", + "shell.execute_reply": "2024-08-29T21:14:45.463774Z" + }, + "slideshow": { + "slide_type": "" + }, + "tags": [ + "raises-exception" + ] + }, + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'ps' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[91], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mps\u001b[49m\u001b[38;5;241m.\u001b[39mto_timestamp()\n", + "\u001b[0;31mNameError\u001b[0m: name 'ps' is not defined" + ] + } + ], + "source": [ + "ps.to_timestamp()" + ] + }, + { + "cell_type": "markdown", + "id": "bf270f9c-86e0-4161-a187-7b0dbb2e47b6", + "metadata": { + "editable": true, + "slideshow": { + "slide_type": "" + }, + "tags": [] + }, + "source": [ + "### Timestamp Binary Operations" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "dd818a8d-97c1-46f3-b29f-499ba92f22ae", + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.465935Z", + "iopub.status.busy": "2024-08-29T21:14:45.465805Z", + "iopub.status.idle": "2024-08-29T21:14:45.468720Z", + "shell.execute_reply": "2024-08-29T21:14:45.468476Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('396 days 03:00:00')" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.to_datetime('2018-10-26 12:00:00') - pd.to_datetime('2017-09-25 09:00:00')" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "7c9a87d2-7883-46a6-8433-dfa5900ca9b0", + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.470366Z", + "iopub.status.busy": "2024-08-29T21:14:45.470260Z", + "iopub.status.idle": "2024-08-29T21:14:45.472362Z", + "shell.execute_reply": "2024-08-29T21:14:45.472118Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('6 days 07:00:00')" + ] + }, + "execution_count": 93, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Timestamp(\"2014-08-01 10:00\") - pd.Timestamp(\"2014-07-26 03:00\")" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "e78454b1-0d4c-42bc-a127-b21a4a7f09cf", + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.474079Z", + "iopub.status.busy": "2024-08-29T21:14:45.473971Z", + "iopub.status.idle": "2024-08-29T21:14:45.476309Z", + "shell.execute_reply": "2024-08-29T21:14:45.476045Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('682 days 03:00:00')" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.Timestamp(year=2017, month=1, day=1, hour=12) - pd.Timestamp(year=2015, month=2, day=19, hour=9)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "2534d141-1862-4901-ba70-7ed73ab9abdd", + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-29T21:14:45.478042Z", + "iopub.status.busy": "2024-08-29T21:14:45.477931Z", + "iopub.status.idle": "2024-08-29T21:14:45.480738Z", + "shell.execute_reply": "2024-08-29T21:14:45.480456Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('-31 days +03:09:02')" + ] + }, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } diff --git a/tests/unit/modin/modin/test_envvars.py b/tests/unit/modin/modin/test_envvars.py index 4f3540a63bf..7c5e3a40bb0 100644 --- a/tests/unit/modin/modin/test_envvars.py +++ b/tests/unit/modin/modin/test_envvars.py @@ -90,6 +90,32 @@ def test_custom_help(make_custom_envvar): assert "custom var" in make_custom_envvar.get_help() +def _init_doc_module(): + # Put the docs_module on the path + sys.path.append(f"{os.path.dirname(__file__)}") + # We use base.py from upstream modin, so we need to initialize its doc module + # However, since using the environment variable causes an importlib.reload call, + # we need to manually call _inherit_docstrings (https://github.com/modin-project/modin/issues/7138) + from .docs_module import classes + + # As a workaround for upstream modin bugs, we use our own _inherit_docstrings instead of the upstream + # function. We accordingly need to clear the docstring dictionary in testing because + # we manually called the annotation on initializing snowflake.snowpark.modin.pandas. + # snowflake.snowpark.modin.utils._attributes_with_docstrings_replaced.clear() + # TODO: once modin 0.31.0 is available, use the actual modin DocModule class + snowflake.snowpark.modin.utils._inherit_docstrings( + classes.BasePandasDataset, + overwrite_existing=True, + )(pd.base.BasePandasDataset) + DocModule.put("docs_module") + + +DOC_OVERRIDE_XFAIL_REASON = ( + "test docstring overrides currently cannot override real docstring overrides until " + "modin 0.31.0 is available" +) + + class TestDocModule: """ Test using a module to replace default docstrings. @@ -99,11 +125,9 @@ class TestDocModule: which we need to fix in upstream modin. """ + @pytest.mark.xfail(strict=True, reason=DOC_OVERRIDE_XFAIL_REASON) def test_overrides(self): - # Put the docs_module on the path - sys.path.append(f"{os.path.dirname(__file__)}") - DocModule.put("docs_module") - + _init_doc_module() # Test for override # TODO(https://github.com/modin-project/modin/issues/7134): Upstream # the BasePandasDataset tests to modin. @@ -144,11 +168,7 @@ def test_overrides(self): def test_not_redefining_classes_modin_issue_7138(self): original_dataframe_class = pd.DataFrame - - # Put the docs_module on the path - sys.path.append(f"{os.path.dirname(__file__)}") - DocModule.put("docs_module") - + _init_doc_module() # Test for override assert ( pd.DataFrame.apply.__doc__ @@ -157,22 +177,20 @@ def test_not_redefining_classes_modin_issue_7138(self): assert pd.DataFrame is original_dataframe_class + @pytest.mark.xfail(strict=True, reason=DOC_OVERRIDE_XFAIL_REASON) def test_base_docstring_override_with_no_dataframe_or_series_class_modin_issue_7113( self, ): # TODO(https://github.com/modin-project/modin/issues/7113): Upstream # this test case to Modin. This test case tests scenario 1 from issue 7113. - sys.path.append(f"{os.path.dirname(__file__)}") - DocModule.put("docs_module_with_just_base") + _init_doc_module() assert pd.base.BasePandasDataset.astype.__doc__ == ( "This is a test of the documentation module for BasePandasDataSet.astype." ) + @pytest.mark.xfail(strict=True, reason=DOC_OVERRIDE_XFAIL_REASON) def test_base_property_not_overridden_in_either_subclass_modin_issue_7113(self): - # Put the docs_module on the path - sys.path.append(f"{os.path.dirname(__file__)}") - DocModule.put("docs_module") - + _init_doc_module() assert ( pd.base.BasePandasDataset.loc.__doc__ == "This is a test of the documentation module for BasePandasDataset.loc." diff --git a/tests/unit/modin/test_groupby_unsupported.py b/tests/unit/modin/test_groupby_unsupported.py index efc48724055..6bb27db446f 100644 --- a/tests/unit/modin/test_groupby_unsupported.py +++ b/tests/unit/modin/test_groupby_unsupported.py @@ -39,7 +39,6 @@ (lambda se: se.groupby("A").skew(), "skew"), (lambda se: se.groupby("A").take(2), "take"), (lambda se: se.groupby("A").expanding(), "expanding"), - (lambda se: se.groupby("A").value_counts(), "value_counts"), (lambda se: se.groupby("A").hist(), "hist"), (lambda se: se.groupby("A").plot(), "plot"), (lambda se: se.groupby("A").boxplot("test_group"), "boxplot"), @@ -83,7 +82,6 @@ def test_series_groupby_unsupported_methods_raises( (lambda df: df.groupby("A").skew(), "skew"), (lambda df: df.groupby("A").take(2), "take"), (lambda df: df.groupby("A").expanding(), "expanding"), - (lambda df: df.groupby("A").value_counts(), "value_counts"), (lambda df: df.groupby("A").hist(), "hist"), (lambda df: df.groupby("A").plot(), "plot"), (lambda df: df.groupby("A").boxplot("test_group"), "boxplot"), diff --git a/tests/unit/modin/test_join_utils.py b/tests/unit/modin/test_join_utils.py new file mode 100644 index 00000000000..031ab13bef9 --- /dev/null +++ b/tests/unit/modin/test_join_utils.py @@ -0,0 +1,97 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +from collections.abc import Hashable +from unittest import mock + +import pytest + +import snowflake.snowpark.modin.plugin # noqa: F401 +from snowflake.snowpark.modin.plugin._internal.frame import ( + OrderedDataFrame, + OrderingColumn, +) +from snowflake.snowpark.modin.plugin._internal.join_utils import ( + InheritJoinIndex, + JoinKeyCoalesceConfig, + _create_internal_frame_with_join_or_align_result, +) +from snowflake.snowpark.modin.plugin.compiler.snowflake_query_compiler import ( + InternalFrame, +) + + +def mock_internal_frame( + data_column_pandas_labels: list[Hashable], + data_column_pandas_index_names: list[Hashable], + data_column_snowflake_quoted_identifiers: list[str], + index_column_pandas_labels: list[Hashable], + index_column_snowflake_quoted_identifiers: list[str], +) -> InternalFrame: + ordered_dataframe = mock.create_autospec(OrderedDataFrame) + ordered_dataframe.projected_column_snowflake_quoted_identifiers = ( + data_column_snowflake_quoted_identifiers + + index_column_snowflake_quoted_identifiers + ) + ordered_dataframe.ordering_columns = [ + OrderingColumn(col) + for col in ordered_dataframe.projected_column_snowflake_quoted_identifiers + ] + internal_frame = InternalFrame.create( + ordered_dataframe=ordered_dataframe, + data_column_pandas_labels=data_column_pandas_labels, + data_column_pandas_index_names=data_column_pandas_index_names, + data_column_snowflake_quoted_identifiers=data_column_snowflake_quoted_identifiers, + index_column_pandas_labels=index_column_pandas_labels, + index_column_snowflake_quoted_identifiers=index_column_snowflake_quoted_identifiers, + data_column_types=[None] * len(data_column_pandas_labels), + index_column_types=[None] * len(index_column_pandas_labels), + ) + + return internal_frame + + +def test_create_internal_frame_with_result_using_invalid_methods(): + left_frame = mock_internal_frame( + data_column_pandas_labels=["a1", "b1"], + data_column_pandas_index_names=[None], + data_column_snowflake_quoted_identifiers=['"A1"', '"B1"'], + index_column_pandas_labels=["i1"], + index_column_snowflake_quoted_identifiers=['"I1"'], + ) + + right_frame = mock_internal_frame( + data_column_pandas_labels=["a2", "b2"], + data_column_pandas_index_names=[None], + data_column_snowflake_quoted_identifiers=['"A2"', '"B2"'], + index_column_pandas_labels=["i2"], + index_column_snowflake_quoted_identifiers=['"I2"'], + ) + + result_ordered_frame = mock.create_autospec(OrderedDataFrame) + result_ordered_frame.projected_column_snowflake_quoted_identifiers = [ + '"I1"', + '"A1"', + '"B1"', + '"I2"', + '"A2"', + '"B2"', + ] + result_ordered_frame._ordering_columns_tuple = [ + OrderingColumn('"I1"'), + OrderingColumn('"I2"'), + ] + + with pytest.raises(AssertionError, match="Unsupported join/align type invalid"): + _create_internal_frame_with_join_or_align_result( + result_ordered_frame=result_ordered_frame, + left=left_frame, + right=right_frame, + how="invalid", + left_on=['"I1"'], + right_on=['"I2"'], + sort=False, + key_coalesce_config=[JoinKeyCoalesceConfig.LEFT], + inherit_index=InheritJoinIndex.FROM_LEFT, + ) diff --git a/tests/unit/modin/test_unsupported.py b/tests/unit/modin/test_unsupported.py index 1e72dbd43ca..63a1cbc3bd3 100644 --- a/tests/unit/modin/test_unsupported.py +++ b/tests/unit/modin/test_unsupported.py @@ -45,7 +45,6 @@ def test_unsupported_io(io_method, kwargs): [ ["merge_ordered", {"left": "", "right": ""}], ["value_counts", {"values": ""}], - ["crosstab", {"index": "", "columns": ""}], ["lreshape", {"data": "", "groups": ""}], ["wide_to_long", {"df": "", "stubnames": "", "i": "", "j": ""}], ],