diff --git a/CHANGELOG.md b/CHANGELOG.md index c7ca1072bb8..ef0b6b91ac1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ #### Improvements +- Added support for `ln` in `snowflake.snowpark.functions` - Added support for specifying the following to `DataFrameWriter.save_as_table`: - `enable_schema_evolution` - `data_retention_time` @@ -51,7 +52,7 @@ #### New Features - Added limited support for the `Timedelta` type, including the following features. Snowpark pandas will raise `NotImplementedError` for unsupported `Timedelta` use cases. - - supporting tracking the Timedelta type through `copy`, `cache_result`, `shift`, `sort_index`. + - supporting tracking the Timedelta type through `copy`, `cache_result`, `shift`, `sort_index`, `assign`, `bfill`, `ffill`, `fillna`, `compare`, `diff`, `drop`, `dropna`, `duplicated`, `empty`, `equals`, `insert`, `isin`, `isna`, `items`, `iterrows`, `join`, `len`, `mask`, `melt`, `merge`, `nlargest`, `nsmallest`. - converting non-timedelta to timedelta via `astype`. - `NotImplementedError` will be raised for the rest of methods that do not support `Timedelta`. - support for subtracting two timestamps to get a Timedelta. diff --git a/docs/source/snowpark/functions.rst b/docs/source/snowpark/functions.rst index 9a381e5046a..877b884e7b4 100644 --- a/docs/source/snowpark/functions.rst +++ b/docs/source/snowpark/functions.rst @@ -196,6 +196,7 @@ Functions length listagg lit + ln locate log lower diff --git a/src/snowflake/snowpark/_internal/udf_utils.py b/src/snowflake/snowpark/_internal/udf_utils.py index 5a92dcb95cd..b79fcdcf9c9 100644 --- a/src/snowflake/snowpark/_internal/udf_utils.py +++ b/src/snowflake/snowpark/_internal/udf_utils.py @@ -1062,7 +1062,7 @@ def resolve_imports_and_packages( packages, include_pandas=is_pandas_udf, statement_params=statement_params, - )[0] + ) if packages is not None else session._resolve_packages( [], @@ -1070,7 +1070,7 @@ def resolve_imports_and_packages( validate_package=False, include_pandas=is_pandas_udf, statement_params=statement_params, - )[0] + ) ) if session is not None: diff --git a/src/snowflake/snowpark/functions.py b/src/snowflake/snowpark/functions.py index 58c2ab8518c..89b2d3aa336 100644 --- a/src/snowflake/snowpark/functions.py +++ b/src/snowflake/snowpark/functions.py @@ -5982,7 +5982,8 @@ def vector_inner_product(v1: ColumnOrName, v2: ColumnOrName) -> Column: def ln(c: ColumnOrLiteral) -> Column: - """Returns the natrual log product of given column expression + """Returns the natrual logarithm of given column expression. + Example:: >>> from snowflake.snowpark.functions import ln >>> from math import e diff --git a/src/snowflake/snowpark/modin/plugin/_internal/binary_op_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/binary_op_utils.py index a0ca357c59b..6d79de24ffb 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/binary_op_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/binary_op_utils.py @@ -512,10 +512,8 @@ def are_equal_types(type1: DataType, type2: DataType) -> bool: Returns: True if given types are equal, False otherwise. """ - if isinstance(type1, TimedeltaType) and not isinstance(type2, TimedeltaType): - return False - if isinstance(type2, TimedeltaType) and not isinstance(type1, TimedeltaType): - return False + if isinstance(type1, TimedeltaType) or isinstance(type2, TimedeltaType): + return type1 == type2 if isinstance(type1, _IntegralType) and isinstance(type2, _IntegralType): return True if isinstance(type1, _FractionalType) and isinstance(type2, _FractionalType): diff --git a/src/snowflake/snowpark/modin/plugin/_internal/isin_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/isin_utils.py index 26d50a8d53c..48edba416c6 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/isin_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/isin_utils.py @@ -14,6 +14,9 @@ ) from snowflake.snowpark.modin.plugin._internal.frame import InternalFrame from snowflake.snowpark.modin.plugin._internal.indexing_utils import set_frame_2d_labels +from snowflake.snowpark.modin.plugin._internal.snowpark_pandas_types import ( + SnowparkPandasType, +) from snowflake.snowpark.modin.plugin._internal.type_utils import infer_series_type from snowflake.snowpark.modin.plugin._internal.utils import ( append_columns, @@ -100,6 +103,13 @@ def scalar_isin_expression( for literal_expr in values ] + # Case 4: If column's and values' data type differs and any of the type is SnowparkPandasType + elif values_dtype != column_dtype and ( + isinstance(values_dtype, SnowparkPandasType) + or isinstance(column_dtype, SnowparkPandasType) + ): + return pandas_lit(False) + values = array_construct(*values) # to_variant is a requirement for array_contains, else an error is produced. diff --git a/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py index 331901f1a67..846f3c64079 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/join_utils.py @@ -172,6 +172,30 @@ def join( JoinTypeLit ), f"Invalid join type: {how}. Allowed values are {get_args(JoinTypeLit)}" + def assert_snowpark_pandas_types_match() -> None: + """If Snowpark pandas types do not match, then a ValueError will be raised.""" + left_types = [ + left.snowflake_quoted_identifier_to_snowpark_pandas_type.get(id, None) + for id in left_on + ] + right_types = [ + right.snowflake_quoted_identifier_to_snowpark_pandas_type.get(id, None) + for id in right_on + ] + for i, (lt, rt) in enumerate(zip(left_types, right_types)): + if lt != rt: + left_on_id = left_on[i] + idx = left.data_column_snowflake_quoted_identifiers.index(left_on_id) + key = left.data_column_pandas_labels[idx] + lt = lt if lt is not None else left.get_snowflake_type(left_on_id) + rt = rt if rt is not None else right.get_snowflake_type(right_on[i]) + raise ValueError( + f"You are trying to merge on {type(lt).__name__} and {type(rt).__name__} columns for key '{key}'. " + f"If you wish to proceed you should use pd.concat" + ) + + assert_snowpark_pandas_types_match() + # Re-project the active columns to make sure all active columns of the internal frame participate # in the join operation, and unnecessary columns are dropped from the projected columns. left = left.select_active_columns() diff --git a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py index 20f5d8b61de..0efa51d0a38 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/snowpark_pandas_types.py @@ -12,13 +12,8 @@ import pandas as native_pd from snowflake.snowpark.column import Column -from snowflake.snowpark.modin.plugin.utils.warning_message import WarningMessage from snowflake.snowpark.types import DataType, LongType -TIMEDELTA_WARNING_MESSAGE = ( - "Snowpark pandas support for Timedelta is not currently available." -) - """Map Python type to its from_pandas method""" _python_type_to_from_pandas: dict[type, Callable[[Any], Any]] = {} @@ -101,6 +96,13 @@ def get_snowpark_pandas_type_for_pandas_type( return _type_to_snowpark_pandas_type[pandas_type]() return None + def type_match(self, value: Any) -> bool: + """Return True if the value's type matches self.""" + val_type = SnowparkPandasType.get_snowpark_pandas_type_for_pandas_type( + type(value) + ) + return self == val_type + class SnowparkPandasColumn(NamedTuple): """A Snowpark Column that has an optional SnowparkPandasType.""" @@ -128,11 +130,14 @@ class TimedeltaType(SnowparkPandasType, LongType): ) def __init__(self) -> None: - # TODO(SNOW-1620452): Remove this warning message before releasing - # Timedelta support. - WarningMessage.single_warning(TIMEDELTA_WARNING_MESSAGE) super().__init__() + def __eq__(self, other: Any) -> bool: + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other: Any) -> bool: + return not self.__eq__(other) + @staticmethod def to_pandas(value: int) -> native_pd.Timedelta: """ diff --git a/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py b/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py index 905f2b23c91..9f1ca22180a 100644 --- a/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py +++ b/src/snowflake/snowpark/modin/plugin/_internal/unpivot_utils.py @@ -735,12 +735,16 @@ def _simple_unpivot( # create the initial set of columns to be retained as identifiers and those # which will be unpivoted. Collect data type information. unpivot_quoted_columns = [] + unpivot_quoted_column_types = [] + ordering_decode_conditions = [] id_col_names = [] id_col_quoted_identifiers = [] - for (pandas_label, snowflake_quoted_identifier) in zip( + id_col_types = [] + for (pandas_label, snowflake_quoted_identifier, sp_pandas_type) in zip( frame.data_column_pandas_labels, frame.data_column_snowflake_quoted_identifiers, + frame.cached_data_column_snowpark_pandas_types, ): is_id_col = pandas_label in pandas_id_columns is_var_col = pandas_label in pandas_value_columns @@ -752,9 +756,11 @@ def _simple_unpivot( col(var_quoted) == pandas_lit(pandas_label) ) unpivot_quoted_columns.append(snowflake_quoted_identifier) + unpivot_quoted_column_types.append(sp_pandas_type) if is_id_col: id_col_names.append(pandas_label) id_col_quoted_identifiers.append(snowflake_quoted_identifier) + id_col_types.append(sp_pandas_type) # create the case expressions used for the final result set ordering based # on the column position. This clause will be appled after the unpivot @@ -787,7 +793,7 @@ def _simple_unpivot( pandas_labels=[unquoted_col_name], )[0] ) - # coalese the values to unpivot and preserve null values This code + # coalesce the values to unpivot and preserve null values This code # can be removed when UNPIVOT_INCLUDE_NULLS is enabled unpivot_columns_normalized_types.append( coalesce(to_variant(c), to_variant(pandas_lit(null_replace_value))).alias( @@ -870,6 +876,13 @@ def _simple_unpivot( var_quoted, corrected_value_column_name, ] + corrected_value_column_type = None + if len(set(unpivot_quoted_column_types)) == 1: + corrected_value_column_type = unpivot_quoted_column_types[0] + final_snowflake_quoted_col_types = id_col_types + [ + None, + corrected_value_column_type, + ] # Create the new frame and compiler return InternalFrame.create( @@ -881,8 +894,8 @@ def _simple_unpivot( index_column_snowflake_quoted_identifiers=[ ordered_dataframe.row_position_snowflake_quoted_identifier ], - data_column_types=None, - index_column_types=None, + data_column_types=final_snowflake_quoted_col_types, + index_column_types=[None], ) diff --git a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py index 7e6336c397e..e13c77f8ec3 100644 --- a/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py +++ b/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py @@ -1499,7 +1499,7 @@ def _shift_values_axis_0( row_position_quoted_identifier = frame.row_position_snowflake_quoted_identifier fill_value_dtype = infer_object_type(fill_value) - fill_value = pandas_lit(fill_value) if fill_value is not None else None + fill_value = None if pd.isna(fill_value) else pandas_lit(fill_value) def shift_expression_and_type( quoted_identifier: str, dtype: DataType @@ -5757,8 +5757,6 @@ def insert( Returns: A new SnowflakeQueryCompiler instance with new column. """ - self._raise_not_implemented_error_for_timedelta() - if not isinstance(value, SnowflakeQueryCompiler): # Scalar value new_internal_frame = self._modin_frame.append_column( @@ -5848,7 +5846,9 @@ def move_last_element(arr: list, index: int) -> None: data_column_snowflake_quoted_identifiers = ( new_internal_frame.data_column_snowflake_quoted_identifiers ) + data_column_types = new_internal_frame.cached_data_column_snowpark_pandas_types move_last_element(data_column_snowflake_quoted_identifiers, loc) + move_last_element(data_column_types, loc) new_internal_frame = InternalFrame.create( ordered_dataframe=new_internal_frame.ordered_dataframe, @@ -5857,8 +5857,8 @@ def move_last_element(arr: list, index: int) -> None: data_column_pandas_index_names=new_internal_frame.data_column_pandas_index_names, index_column_pandas_labels=new_internal_frame.index_column_pandas_labels, index_column_snowflake_quoted_identifiers=new_internal_frame.index_column_snowflake_quoted_identifiers, - data_column_types=None, - index_column_types=None, + data_column_types=data_column_types, + index_column_types=new_internal_frame.cached_index_column_snowpark_pandas_types, ) return SnowflakeQueryCompiler(new_internal_frame) @@ -6645,8 +6645,6 @@ def melt( Notes: melt does not yet handle multiindex or ignore index """ - self._raise_not_implemented_error_for_timedelta() - if col_level is not None: raise NotImplementedError( "Snowpark Pandas doesn't support 'col_level' argument in melt API" @@ -6749,8 +6747,6 @@ def merge( Returns: SnowflakeQueryCompiler instance with merged result. """ - self._raise_not_implemented_error_for_timedelta() - if validate: ErrorMessage.not_implemented( "Snowpark pandas merge API doesn't yet support 'validate' parameter" @@ -9815,6 +9811,10 @@ def _fillna_with_masking( # case 2: fillna with a method if method is not None: + # no Snowpark pandas type change in this case + data_column_snowpark_pandas_types = ( + self._modin_frame.cached_data_column_snowpark_pandas_types + ) method = FillNAMethod.get_enum_for_string_method(method) method_is_ffill = method is FillNAMethod.FFILL_METHOD if axis == 0: @@ -9921,6 +9921,7 @@ def fillna_expr(snowflake_quoted_id: str) -> SnowparkColumn: include_index=False, ) fillna_column_map = {} + data_column_snowpark_pandas_types = [] if columns_mask is not None: columns_to_ignore = itertools.compress( self._modin_frame.data_column_pandas_labels, @@ -9940,10 +9941,18 @@ def fillna_expr(snowflake_quoted_id: str) -> SnowparkColumn: col(id), coalesce(id, pandas_lit(val)), ) + col_type = self._modin_frame.get_snowflake_type(id) + col_pandas_type = ( + col_type + if isinstance(col_type, SnowparkPandasType) + and col_type.type_match(val) + else None + ) + data_column_snowpark_pandas_types.append(col_pandas_type) return SnowflakeQueryCompiler( self._modin_frame.update_snowflake_quoted_identifiers_with_expressions( - fillna_column_map + fillna_column_map, data_column_snowpark_pandas_types ).frame ) @@ -10217,7 +10226,8 @@ def diff(self, periods: int, axis: int) -> "SnowflakeQueryCompiler": } return SnowflakeQueryCompiler( self._modin_frame.update_snowflake_quoted_identifiers_with_expressions( - diff_label_to_value_map + diff_label_to_value_map, + self._modin_frame.cached_data_column_snowpark_pandas_types, ).frame ) diff --git a/src/snowflake/snowpark/session.py b/src/snowflake/snowpark/session.py index 5414d9a089d..b718364dc83 100644 --- a/src/snowflake/snowpark/session.py +++ b/src/snowflake/snowpark/session.py @@ -1117,11 +1117,10 @@ def add_packages( to ensure the consistent experience of a UDF between your local environment and the Snowflake server. """ - _, resolved_result_dict = self._resolve_packages( + self._resolve_packages( parse_positional_args_to_list(*packages), self._packages, ) - self._packages.update(resolved_result_dict) def remove_package(self, package: str) -> None: """ @@ -1482,12 +1481,13 @@ def _resolve_packages( validate_package: bool = True, include_pandas: bool = False, statement_params: Optional[Dict[str, str]] = None, - ) -> Tuple[List[str], Dict[str, str]]: + ) -> List[str]: """ Given a list of packages to add, this method will 1. Check if the packages are supported by Snowflake 2. Check if the package version if provided is supported by Snowflake 3. Check if the package is already added + 4. Update existing packages dictionary with the new packages (*this is required for python sp to work*) When auto package upload is enabled, this method will also try to upload the packages unavailable in Snowflake to the stage. @@ -1496,7 +1496,6 @@ def _resolve_packages( Returns: List[str]: List of package specifiers - Dict[str, str]: Dictionary of package name -> package specifier """ # Extract package names, whether they are local, and their associated Requirement objects package_dict = self._parse_packages(packages) @@ -1518,7 +1517,9 @@ def _resolve_packages( raise errors[0] elif len(errors) > 0: raise RuntimeError(errors) - return list(result_dict.values()), result_dict + + self._packages.update(result_dict) + return list(result_dict.values()) package_table = "information_schema.packages" if not self.get_current_database(): @@ -1531,7 +1532,7 @@ def _resolve_packages( # 'scikit-learn': 'scikit-learn==1.2.2', # 'python-dateutil': 'python-dateutil==2.8.2'} # Add to packages dictionary. Make a copy of existing packages - # dictionary to avoid modifying it. + # dictionary to avoid modifying it during intermediate steps. result_dict = ( existing_packages_dict.copy() if existing_packages_dict is not None else {} ) @@ -1567,10 +1568,10 @@ def _resolve_packages( if include_pandas: extra_modules.append("pandas") - return ( - list(result_dict.values()) - + self._get_req_identifiers_list(extra_modules, result_dict), - result_dict, + if existing_packages_dict is not None: + existing_packages_dict.update(result_dict) + return list(result_dict.values()) + self._get_req_identifiers_list( + extra_modules, result_dict ) def _upload_unsupported_packages( diff --git a/tests/integ/modin/data.py b/tests/integ/modin/data.py index 653e0037e09..35c4d321787 100644 --- a/tests/integ/modin/data.py +++ b/tests/integ/modin/data.py @@ -1,6 +1,7 @@ # # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. # +import pandas as native_pd RAW_NA_DF_DATA_TEST_CASES = [ ({"A": [1, 2, 3], "B": [4, 5, 6]}, "numeric-no"), @@ -16,9 +17,18 @@ ({"A": [True, 1, "X"], "B": ["Y", 3.14, False]}, "mixed"), ({"A": [True, None, "X"], "B": [None, 3.14, None]}, "mixed-mixed-1"), ({"A": [None, 1, None], "B": ["Y", None, False]}, "mixed-mixed-2"), + ( + { + "A": [None, native_pd.Timedelta(2), None], + "B": [native_pd.Timedelta(4), None, native_pd.Timedelta(6)], + }, + "timedelta-mixed-1", + ), ] RAW_NA_DF_SERIES_TEST_CASES = [ (list(df_data.values()), test_case) - for (df_data, test_case) in RAW_NA_DF_DATA_TEST_CASES + for (df_data, test_case) in RAW_NA_DF_DATA_TEST_CASES[ + :1 + ] # "timedelta-mixed-1" is not json serializable ] diff --git a/tests/integ/modin/frame/test_assign.py b/tests/integ/modin/frame/test_assign.py index b0da2a110bf..b1677deda8f 100644 --- a/tests/integ/modin/frame/test_assign.py +++ b/tests/integ/modin/frame/test_assign.py @@ -238,3 +238,24 @@ def test_overwrite_columns_via_assign(): eval_snowpark_pandas_result( snow_df, native_df, lambda df: df.assign(a=df["b"], last_col=[10, 11, 12]) ) + + +@sql_count_checker(query_count=2, join_count=1) +def test_assign_basic_timedelta_series(): + snow_df, native_df = create_test_dfs( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + columns=native_pd.Index(list("abc"), name="columns"), + index=native_pd.Index([0, 1, 2], name="index"), + ) + native_df.columns.names = ["columns"] + native_df.index.names = ["index"] + + native_td = native_pd.timedelta_range("1 day", periods=3) + + def assign_func(df): + if isinstance(df, pd.DataFrame): + return df.assign(new_col=pd.Series(native_td)) + else: + return df.assign(new_col=native_pd.Series(native_td)) + + eval_snowpark_pandas_result(snow_df, native_df, assign_func) diff --git a/tests/integ/modin/frame/test_bfill_ffill.py b/tests/integ/modin/frame/test_bfill_ffill.py index 7938fe4059f..504261b80fe 100644 --- a/tests/integ/modin/frame/test_bfill_ffill.py +++ b/tests/integ/modin/frame/test_bfill_ffill.py @@ -14,7 +14,7 @@ @pytest.mark.parametrize("func", ["backfill", "bfill", "ffill", "pad"]) @sql_count_checker(query_count=1) -def test_df_func(func): +def test_df_fill(func): native_df = native_pd.DataFrame( [ [np.nan, 2, np.nan, 0], @@ -31,3 +31,24 @@ def test_df_func(func): native_df, lambda df: getattr(df, func)(), ) + + +@pytest.mark.parametrize("func", ["backfill", "bfill", "ffill", "pad"]) +@sql_count_checker(query_count=1) +def test_df_timedelta_fill(func): + native_df = native_pd.DataFrame( + [ + [np.nan, 2, np.nan, 0], + [3, 4, np.nan, 1], + [np.nan, np.nan, np.nan, np.nan], + [np.nan, 3, np.nan, 4], + [3, np.nan, 4, np.nan], + ], + columns=list("ABCD"), + ).astype("timedelta64[ns]") + snow_df = pd.DataFrame(native_df) + eval_snowpark_pandas_result( + snow_df, + native_df, + lambda df: getattr(df, func)(), + ) diff --git a/tests/integ/modin/frame/test_compare.py b/tests/integ/modin/frame/test_compare.py index 9a0f7caf88d..c7f0c6f81d4 100644 --- a/tests/integ/modin/frame/test_compare.py +++ b/tests/integ/modin/frame/test_compare.py @@ -35,16 +35,10 @@ def base_df() -> native_pd.DataFrame: return native_pd.DataFrame( [ - [None, None, 3.1, pd.Timestamp("2024-01-01"), [130]], - [ - "a", - 1, - 4.2, - pd.Timestamp("2024-02-01"), - [131], - ], - ["b", 2, 5.3, pd.Timestamp("2024-03-01"), [132]], - [None, 3, 6.4, pd.Timestamp("2024-04-01"), [133]], + [None, None, 3.1, pd.Timestamp("2024-01-01"), [130], pd.Timedelta(1)], + ["a", 1, 4.2, pd.Timestamp("2024-02-01"), [131], pd.Timedelta(11)], + ["b", 2, 5.3, pd.Timestamp("2024-03-01"), [132], pd.Timedelta(21)], + [None, 3, 6.4, pd.Timestamp("2024-04-01"), [133], pd.Timedelta(13)], ], index=pd.MultiIndex.from_tuples( [ @@ -64,6 +58,7 @@ def base_df() -> native_pd.DataFrame: ("group_2", "float_col"), ("group_2", "timestamp_col"), ("group_2", "list_col"), + ("group_2", "timedelta_col"), ], names=["column_level1", "column_level2"], ), diff --git a/tests/integ/modin/frame/test_diff.py b/tests/integ/modin/frame/test_diff.py index 26aa5b74c85..185b2eab89e 100644 --- a/tests/integ/modin/frame/test_diff.py +++ b/tests/integ/modin/frame/test_diff.py @@ -140,6 +140,20 @@ def test_df_diff_bool_df(periods): eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.diff(periods=periods)) +@sql_count_checker(query_count=1) +@pytest.mark.parametrize("periods", [0, 1]) +def test_df_diff_timedelta_df(periods): + native_df = native_pd.DataFrame( + np.arange(NUM_ROWS_TALL_DF * NUM_COLS_TALL_DF).reshape( + (NUM_ROWS_TALL_DF, NUM_COLS_TALL_DF) + ), + columns=["A", "B", "C", "D"], + ) + native_df = native_df.astype({"A": "timedelta64[ns]", "C": "timedelta64[ns]"}) + snow_df = pd.DataFrame(native_df) + eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.diff(periods=periods)) + + @sql_count_checker(query_count=1) @pytest.mark.parametrize("periods", [0, 1]) def test_df_diff_int_and_bool_df(periods): diff --git a/tests/integ/modin/frame/test_drop.py b/tests/integ/modin/frame/test_drop.py index e71999dd28d..cc1a1a203d3 100644 --- a/tests/integ/modin/frame/test_drop.py +++ b/tests/integ/modin/frame/test_drop.py @@ -70,6 +70,18 @@ def test_drop_list_like(native_df, labels): eval_snowpark_pandas_result(snow_df, native_df, lambda df: df.drop(labels, axis=1)) +@pytest.mark.parametrize( + "labels", [Index(["red", "green"]), np.array(["red", "green"])] +) +@sql_count_checker(query_count=1) +def test_drop_timedelta(native_df, labels): + native_df_dt = native_df.astype({"red": "timedelta64[ns]"}) + snow_df = pd.DataFrame(native_df_dt) + eval_snowpark_pandas_result( + snow_df, native_df_dt, lambda df: df.drop(labels, axis=1) + ) + + @pytest.mark.parametrize( "labels, axis, expected_query_count", [ diff --git a/tests/integ/modin/frame/test_dropna.py b/tests/integ/modin/frame/test_dropna.py index e5fb2085417..d77c65d055e 100644 --- a/tests/integ/modin/frame/test_dropna.py +++ b/tests/integ/modin/frame/test_dropna.py @@ -19,6 +19,7 @@ def test_dropna_df(): "name": ["Alfred", "Batman", "Catwoman"], "toy": [np.nan, "Batmobile", "Bullwhip"], "born": [pd.NaT, pd.Timestamp("1940-04-25"), pd.NaT], + "dt": [pd.NaT, pd.Timedelta(1), pd.NaT], } ) diff --git a/tests/integ/modin/frame/test_duplicated.py b/tests/integ/modin/frame/test_duplicated.py index e4c5d594ecc..0eade6af114 100644 --- a/tests/integ/modin/frame/test_duplicated.py +++ b/tests/integ/modin/frame/test_duplicated.py @@ -53,11 +53,24 @@ def test_duplicated_with_misspelled_column_name_or_empty_subset(subset): (["A"], native_pd.Series([False, False, True, False, True])), (["B"], native_pd.Series([False, False, False, True, True])), (["A", "B"], native_pd.Series([False, False, False, False, True])), + ("C", native_pd.Series([False, False, True, False, True])), ], ) @sql_count_checker(query_count=1, join_count=1) def test_duplicated_subset(subset, expected): - df = pd.DataFrame({"A": [0, 1, 1, 2, 0], "B": ["a", "b", "c", "b", "a"]}) + df = pd.DataFrame( + { + "A": [0, 1, 1, 2, 0], + "B": ["a", "b", "c", "b", "a"], + "C": [ + pd.Timedelta(1), + pd.Timedelta(10), + pd.Timedelta(1), + pd.Timedelta(0), + pd.Timedelta(10), + ], + } + ) result = df.duplicated(subset=subset) assert_snowpark_pandas_equal_to_pandas(result, expected) diff --git a/tests/integ/modin/frame/test_empty.py b/tests/integ/modin/frame/test_empty.py index 0ed4d2c9fa9..b39a77eae91 100644 --- a/tests/integ/modin/frame/test_empty.py +++ b/tests/integ/modin/frame/test_empty.py @@ -16,7 +16,14 @@ @pytest.mark.parametrize( "dataframe_input, test_case_name", [ - ({"A": [1, 2, 3], "B": [4, 5, 6]}, "simple non-empty"), + ( + { + "A": [1, 2, 3], + "B": [4, 5, 6], + "C": native_pd.timedelta_range(1, periods=3), + }, + "simple non-empty", + ), ({"A": [], "B": []}, "empty column"), ({"A": [np.nan]}, "np nan column"), ], diff --git a/tests/integ/modin/frame/test_equals.py b/tests/integ/modin/frame/test_equals.py index 95b6b8ffd6f..2e2dc2fa129 100644 --- a/tests/integ/modin/frame/test_equals.py +++ b/tests/integ/modin/frame/test_equals.py @@ -25,6 +25,11 @@ ([1, 2, None], [1, 2, None], True), # nulls are considered equal ([1, 2, 3], [1.0, 2.0, 3.0], False), # float and integer types are not equal ([1, 2, 3], ["1", "2", "3"], False), # integer and string types are not equal + ( + [1, 2, 3], + pandas.timedelta_range(1, periods=3), + False, # timedelta and integer types are not equal + ), ], ) @sql_count_checker(query_count=2, join_count=2) diff --git a/tests/integ/modin/frame/test_fillna.py b/tests/integ/modin/frame/test_fillna.py index 189e757c8b2..677c8d3ddc5 100644 --- a/tests/integ/modin/frame/test_fillna.py +++ b/tests/integ/modin/frame/test_fillna.py @@ -150,6 +150,23 @@ def test_value_scalar(test_fillna_df): ) +@sql_count_checker(query_count=2) +def test_timedelta_value_scalar(test_fillna_df): + timedelta_df = test_fillna_df.astype("timedelta64[ns]") + eval_snowpark_pandas_result( + pd.DataFrame(timedelta_df), + timedelta_df, + lambda df: df.fillna(pd.Timedelta(1)), # dtype keeps to be timedelta64[ns] + ) + + # Snowpark pandas dtype will be changed to int in this case + eval_snowpark_pandas_result( + pd.DataFrame(timedelta_df), + test_fillna_df, + lambda df: df.fillna(1), + ) + + @sql_count_checker(query_count=1) def test_value_scalar_none_index(test_fillna_df_none_index): # note: none in index should not be filled diff --git a/tests/integ/modin/frame/test_idxmax_idxmin.py b/tests/integ/modin/frame/test_idxmax_idxmin.py index f5a8a6d4b85..72fe88968bc 100644 --- a/tests/integ/modin/frame/test_idxmax_idxmin.py +++ b/tests/integ/modin/frame/test_idxmax_idxmin.py @@ -194,6 +194,26 @@ def test_idxmax_idxmin_with_dates(func, axis): ) +@sql_count_checker(query_count=1) +@pytest.mark.parametrize("func", ["idxmax", "idxmin"]) +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.xfail(reason="SNOW-1625380 TODO") +def test_idxmax_idxmin_with_timedelta(func, axis): + native_df = native_pd.DataFrame( + data={ + "date_1": native_pd.timedelta_range(1, periods=3), + "date_2": [pd.Timedelta(1), pd.Timedelta(-1), pd.Timedelta(0)], + }, + index=[10, 17, 12], + ) + snow_df = pd.DataFrame(native_df) + eval_snowpark_pandas_result( + snow_df, + native_df, + lambda df: getattr(df, func)(axis=axis), + ) + + @sql_count_checker(query_count=1) @pytest.mark.parametrize("func", ["idxmax", "idxmin"]) @pytest.mark.parametrize("axis", [0, 1]) diff --git a/tests/integ/modin/frame/test_insert.py b/tests/integ/modin/frame/test_insert.py index 258d4d2e641..86f5bd8082c 100644 --- a/tests/integ/modin/frame/test_insert.py +++ b/tests/integ/modin/frame/test_insert.py @@ -1,6 +1,8 @@ # # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. # +import functools + import modin.pandas as pd import numpy as np import pandas as native_pd @@ -768,3 +770,32 @@ def insert_op(df): expected_res = native_df1.join(native_df2["bar"], how="left", sort=False) expected_res = expected_res[["bar", "foo"]] assert_frame_equal(snow_res, expected_res, check_dtype=False) + + +@sql_count_checker(query_count=4, join_count=6) +def test_insert_timedelta(): + native_df = native_pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}) + snow_df = pd.DataFrame(native_df) + + def insert(column, vals, df): + if isinstance(df, pd.DataFrame) and isinstance(vals, native_pd.Series): + values = pd.Series(vals) + else: + values = vals + df.insert(1, column, values) + return df + + vals = native_pd.timedelta_range(1, periods=2) + eval_snowpark_pandas_result( + snow_df, native_df, functools.partial(insert, "td", vals) + ) + + vals = native_pd.Series(native_pd.timedelta_range(1, periods=2)) + eval_snowpark_pandas_result( + snow_df, native_df, functools.partial(insert, "td2", vals) + ) + + vals = native_pd.Series(native_pd.timedelta_range(1, periods=2), index=[0, 2]) + eval_snowpark_pandas_result( + snow_df, native_df, functools.partial(insert, "td3", vals) + ) diff --git a/tests/integ/modin/frame/test_isin.py b/tests/integ/modin/frame/test_isin.py index c0f0a3ce37b..cd560a5715a 100644 --- a/tests/integ/modin/frame/test_isin.py +++ b/tests/integ/modin/frame/test_isin.py @@ -248,3 +248,24 @@ def test_isin_dataframe_values_type_negative(): ): df = pd.DataFrame([1, 2, 3]) df.isin(values="abcdef") + + +@sql_count_checker(query_count=3) +@pytest.mark.parametrize( + "values", + [ + pytest.param([2, 3], id="integers"), + pytest.param([pd.Timedelta(2), pd.Timedelta(3)], id="timedeltas"), + ], +) +def test_isin_timedelta(values): + native_df = native_pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 2]}).astype( + {"b": "timedelta64[ns]"} + ) + snow_df = pd.DataFrame(native_df) + + eval_snowpark_pandas_result( + snow_df, + native_df, + lambda df: _test_isin_with_snowflake_logic(df, values, query_count=1), + ) diff --git a/tests/integ/modin/frame/test_items.py b/tests/integ/modin/frame/test_items.py index d409a0f326a..9cbd4945ee6 100644 --- a/tests/integ/modin/frame/test_items.py +++ b/tests/integ/modin/frame/test_items.py @@ -51,6 +51,7 @@ def assert_items_results_equal(snow_result, pandas_result) -> None: ), native_pd.DataFrame(index=["a"]), native_pd.DataFrame(columns=["a"]), + native_pd.DataFrame({"ts": native_pd.timedelta_range(10, periods=10)}), ], ) def test_items(dataframe): diff --git a/tests/integ/modin/frame/test_iterrows.py b/tests/integ/modin/frame/test_iterrows.py index 700d1b4ec27..fc415b2daf5 100644 --- a/tests/integ/modin/frame/test_iterrows.py +++ b/tests/integ/modin/frame/test_iterrows.py @@ -53,6 +53,7 @@ def assert_iterators_equal(snowpark_iterator, native_iterator): ), # empty df native_pd.DataFrame([]), + native_pd.DataFrame({"ts": native_pd.timedelta_range(10, periods=4)}), ], ) def test_df_iterrows(native_df): diff --git a/tests/integ/modin/frame/test_itertuples.py b/tests/integ/modin/frame/test_itertuples.py index c3687a939c7..eed33f9e1a4 100644 --- a/tests/integ/modin/frame/test_itertuples.py +++ b/tests/integ/modin/frame/test_itertuples.py @@ -37,6 +37,7 @@ native_pd.DataFrame([[1, 1.5], [2, 2.5], [3, 7.8]], columns=["i nt", "flo at"]), # empty df native_pd.DataFrame([]), + native_pd.DataFrame({"ts": native_pd.timedelta_range(10, periods=10)}), ] diff --git a/tests/integ/modin/frame/test_join.py b/tests/integ/modin/frame/test_join.py index 91500189d12..964b6f5426b 100644 --- a/tests/integ/modin/frame/test_join.py +++ b/tests/integ/modin/frame/test_join.py @@ -259,3 +259,23 @@ def test_join_validate_negative(lvalues, rvalues, validate): msg = "Snowpark pandas merge API doesn't yet support 'validate' parameter" with pytest.raises(NotImplementedError, match=msg): left.join(right, validate=validate) + + +@sql_count_checker(query_count=6, join_count=2) +def test_join_timedelta(left, right): + right = right.astype("timedelta64[ns]") + eval_snowpark_pandas_result( + left, + left.to_pandas(), + lambda df: df.join( + right if isinstance(df, pd.DataFrame) else right.to_pandas() + ), + ) + left = left.astype("timedelta64[ns]") + eval_snowpark_pandas_result( + left, + left.to_pandas(), + lambda df: df.join( + right if isinstance(df, pd.DataFrame) else right.to_pandas() + ), + ) diff --git a/tests/integ/modin/frame/test_len.py b/tests/integ/modin/frame/test_len.py index 1adeec50caa..d52df4bf567 100644 --- a/tests/integ/modin/frame/test_len.py +++ b/tests/integ/modin/frame/test_len.py @@ -16,6 +16,7 @@ ({"a": []}, 0), ({"a": [1, 2]}, 2), ({"a": [1, 2], "b": [1, 2], "c": [1, 2]}, 2), + ({"td": native_pd.timedelta_range(1, periods=20)}, 20), ], ) @sql_count_checker(query_count=1) diff --git a/tests/integ/modin/frame/test_mask.py b/tests/integ/modin/frame/test_mask.py index 684d8ba4342..e490f34e905 100644 --- a/tests/integ/modin/frame/test_mask.py +++ b/tests/integ/modin/frame/test_mask.py @@ -954,3 +954,12 @@ def perform_mask(df): native_df, perform_mask, ) + + +@sql_count_checker(query_count=1) +def test_mask_timedelta(test_data): + native_df = native_pd.DataFrame(test_data, dtype="timedelta64[ns]") + snow_df = pd.DataFrame(native_df) + eval_snowpark_pandas_result( + snow_df, native_df, lambda df: df.mask(df > pd.Timedelta(1)) + ) diff --git a/tests/integ/modin/frame/test_melt.py b/tests/integ/modin/frame/test_melt.py index 68d25b1e482..0812bb2c60c 100644 --- a/tests/integ/modin/frame/test_melt.py +++ b/tests/integ/modin/frame/test_melt.py @@ -303,3 +303,19 @@ def test_everything(): value_name="dependent", ), ) + + +@sql_count_checker(query_count=1) +@pytest.mark.parametrize("value_vars", [["B"], ["B", "C"]]) +def test_melt_timedelta(value_vars): + native_df = npd.DataFrame( + { + "A": {0: "a", 1: "b", 2: "c"}, + "B": {0: 1, 1: 3, 2: 5}, + "C": {0: 2, 1: 4, 2: 6}, + } + ).astype({"B": "timedelta64[ns]", "C": "timedelta64[ns]"}) + snow_df = pd.DataFrame(native_df) + eval_snowpark_pandas_result( + snow_df, native_df, lambda df: df.melt(id_vars=["A"], value_vars=value_vars) + ) diff --git a/tests/integ/modin/frame/test_merge.py b/tests/integ/modin/frame/test_merge.py index 7ac88042e7f..c1ced99fc67 100644 --- a/tests/integ/modin/frame/test_merge.py +++ b/tests/integ/modin/frame/test_merge.py @@ -1156,3 +1156,62 @@ def test_merge_validate_negative(lvalues, rvalues, validate): msg = "Snowpark pandas merge API doesn't yet support 'validate' parameter" with pytest.raises(NotImplementedError, match=msg): left.merge(right, left_on="A", right_on="B", validate=validate) + + +@sql_count_checker(query_count=1, join_count=1) +def test_merge_timedelta_on(): + left_df = native_pd.DataFrame( + {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]} + ).astype({"value": "timedelta64[ns]"}) + right_df = native_pd.DataFrame( + {"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]} + ).astype({"value": "timedelta64[ns]"}) + eval_snowpark_pandas_result( + pd.DataFrame(left_df), + left_df, + lambda df: df.merge( + pd.DataFrame(right_df) if isinstance(df, pd.DataFrame) else right_df, + left_on="lkey", + right_on="rkey", + ), + ) + + +@pytest.mark.parametrize( + "kwargs", + [ + {"how": "inner", "on": "a"}, + {"how": "right", "on": "a"}, + {"how": "right", "on": "b"}, + {"how": "left", "on": "c"}, + {"how": "cross"}, + ], +) +def test_merge_timedelta_how(kwargs): + left_df = native_pd.DataFrame( + {"a": ["foo", "bar"], "b": [1, 2], "c": [3, 5]} + ).astype({"b": "timedelta64[ns]"}) + right_df = native_pd.DataFrame( + {"a": ["foo", "baz"], "b": [1, 3], "c": [3, 4]} + ).astype({"b": "timedelta64[ns]", "c": "timedelta64[ns]"}) + count = 1 + expect_exception = False + if "c" == kwargs.get("on", None): # merge timedelta with int exception + expect_exception = True + count = 0 + + with SqlCounter(query_count=count, join_count=count): + eval_snowpark_pandas_result( + pd.DataFrame(left_df), + left_df, + lambda df: df.merge( + pd.DataFrame(right_df) if isinstance(df, pd.DataFrame) else right_df, + **kwargs, + ), + expect_exception=expect_exception, + expect_exception_match="You are trying to merge on LongType and TimedeltaType columns for key 'c'. If you " + "wish to proceed you should use pd.concat", + expect_exception_type=ValueError, + assert_exception_equal=False, # pandas exception: You are trying to merge on int64 and timedelta64[ns] + # columns for key 'c'. If you wish to proceed you should use pd.concat + ) diff --git a/tests/integ/modin/frame/test_nlargest_nsmallest.py b/tests/integ/modin/frame/test_nlargest_nsmallest.py index c32fb64a80e..c528c99d1b0 100644 --- a/tests/integ/modin/frame/test_nlargest_nsmallest.py +++ b/tests/integ/modin/frame/test_nlargest_nsmallest.py @@ -2,6 +2,7 @@ # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. # import modin.pandas as pd +import pandas as native_pd import pytest import snowflake.snowpark.modin.plugin # noqa: F401 @@ -124,3 +125,20 @@ def test_nlargest_nsmallest_non_numeric_types(method, data): n = 2 expected_df = snow_df.sort_values("A", ascending=(method == "nsmallest")).head(n) assert_frame_equal(getattr(snow_df, method)(n, "A"), expected_df) + + +@pytest.mark.parametrize("n", [1, 2, 4]) +@pytest.mark.parametrize("columns", ["A", "B", ["A", "B"], ["B", "A"]]) +@pytest.mark.parametrize("keep", ["first", "last"]) +@sql_count_checker(query_count=1) +def test_time_delta_nlargest_nsmallest(method, n, columns, keep): + native_df = native_pd.DataFrame( + {"A": [3, 2, 1, 4, 4], "B": [1, 2, 3, 4, 5]} + ).astype("timedelta64[ns]") + snow_df = pd.DataFrame(native_df) + + eval_snowpark_pandas_result( + snow_df, + native_df, + lambda df: getattr(df, method)(n, columns=columns, keep=keep), + ) diff --git a/tests/integ/modin/series/test_shift.py b/tests/integ/modin/series/test_shift.py index 7f27c4d313b..f5d4169026e 100644 --- a/tests/integ/modin/series/test_shift.py +++ b/tests/integ/modin/series/test_shift.py @@ -46,11 +46,7 @@ def test_series_with_values_shift(series, periods, fill_value): lambda s: s.shift( periods=periods, fill_value=pd.Timedelta(fill_value) - if isinstance( - s, native_pd.Series - ) # pandas does not support fill int to timedelta - and s.dtype == "timedelta64[ns]" - and fill_value is not no_default + if s.dtype == "timedelta64[ns]" and fill_value is not no_default else fill_value, ), ) diff --git a/tests/integ/modin/types/test_timedelta.py b/tests/integ/modin/types/test_timedelta.py index f0d8440009f..bcae016cbf0 100644 --- a/tests/integ/modin/types/test_timedelta.py +++ b/tests/integ/modin/types/test_timedelta.py @@ -2,15 +2,11 @@ # Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. # import datetime -import logging import modin.pandas as pd import pandas as native_pd import pytest -from snowflake.snowpark.modin.plugin._internal.snowpark_pandas_types import ( - TIMEDELTA_WARNING_MESSAGE, -) from tests.integ.modin.sql_counter import sql_count_checker from tests.integ.modin.utils import ( assert_series_equal, @@ -21,13 +17,11 @@ @sql_count_checker(query_count=1) -def test_create_timedelta_column_from_pandas_timedelta(caplog): +def test_create_timedelta_column_from_pandas_timedelta(): pandas_df = native_pd.DataFrame( {"timedelta_column": [native_pd.Timedelta(nanoseconds=1)], "int_column": [3]} ) - with caplog.at_level(logging.DEBUG): - snow_df = pd.DataFrame(pandas_df) - assert TIMEDELTA_WARNING_MESSAGE in caplog.text + snow_df = pd.DataFrame(pandas_df) eval_snowpark_pandas_result(snow_df, pandas_df, lambda df: df) diff --git a/tests/integ/test_packaging.py b/tests/integ/test_packaging.py index 3deac4e80f3..eaf99534e2b 100644 --- a/tests/integ/test_packaging.py +++ b/tests/integ/test_packaging.py @@ -262,7 +262,7 @@ def is_yaml_available() -> bool: # add module objects # but we can't register a udf with these versions # because the server might not have them - resolved_packages, _ = session._resolve_packages( + resolved_packages = session._resolve_packages( [numpy, pandas, dateutil], validate_package=False ) assert f"numpy=={numpy.__version__}" in resolved_packages diff --git a/tests/notebooks/modin/MIMICHealthcareDemo.ipynb b/tests/notebooks/modin/MIMICHealthcareDemo.ipynb index 3f1849e52cd..95a75e3c858 100644 --- a/tests/notebooks/modin/MIMICHealthcareDemo.ipynb +++ b/tests/notebooks/modin/MIMICHealthcareDemo.ipynb @@ -33,6 +33,12 @@ "execution_count": 1, "id": "90243e71-4cf0-4971-a95e-3f29e12449fc", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:35.536214Z", + "iopub.status.busy": "2024-08-28T17:27:35.535897Z", + "iopub.status.idle": "2024-08-28T17:27:36.977905Z", + "shell.execute_reply": "2024-08-28T17:27:36.977472Z" + }, "tags": [] }, "outputs": [], @@ -63,6 +69,12 @@ "execution_count": 2, "id": "c309356f-14f8-469a-9257-b944b8951410", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:36.980268Z", + "iopub.status.busy": "2024-08-28T17:27:36.980102Z", + "iopub.status.idle": "2024-08-28T17:27:45.691050Z", + "shell.execute_reply": "2024-08-28T17:27:45.690724Z" + }, "tags": [] }, "outputs": [], @@ -84,6 +96,12 @@ "execution_count": 3, "id": "68823bb5-fcd1-4f92-b767-e5ac83dc3df7", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:45.693385Z", + "iopub.status.busy": "2024-08-28T17:27:45.693251Z", + "iopub.status.idle": "2024-08-28T17:27:46.018818Z", + "shell.execute_reply": "2024-08-28T17:27:46.018231Z" + }, "tags": [] }, "outputs": [], @@ -126,6 +144,12 @@ "execution_count": 4, "id": "9a7fc3b9-50db-49da-a18a-8865a3356f31", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:46.022960Z", + "iopub.status.busy": "2024-08-28T17:27:46.022736Z", + "iopub.status.idle": "2024-08-28T17:27:49.916885Z", + "shell.execute_reply": "2024-08-28T17:27:49.916624Z" + }, "tags": [] }, "outputs": [ @@ -306,6 +330,12 @@ "execution_count": 5, "id": "7692a0af-de2f-42d1-9110-15ce104c2c5c", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:49.918782Z", + "iopub.status.busy": "2024-08-28T17:27:49.918678Z", + "iopub.status.idle": "2024-08-28T17:27:50.561066Z", + "shell.execute_reply": "2024-08-28T17:27:50.560658Z" + }, "tags": [] }, "outputs": [ @@ -358,7 +388,14 @@ "cell_type": "code", "execution_count": 6, "id": "5344da61-915d-43cf-894a-484876450748", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:50.563582Z", + "iopub.status.busy": "2024-08-28T17:27:50.563395Z", + "iopub.status.idle": "2024-08-28T17:27:50.768782Z", + "shell.execute_reply": "2024-08-28T17:27:50.768309Z" + } + }, "outputs": [ { "name": "stderr", @@ -390,17 +427,15 @@ "execution_count": 7, "id": "5f72ca6b-ae9a-4a68-a391-83b065785004", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:50.770869Z", + "iopub.status.busy": "2024-08-28T17:27:50.770722Z", + "iopub.status.idle": "2024-08-28T17:27:50.888703Z", + "shell.execute_reply": "2024-08-28T17:27:50.888387Z" + }, "tags": [] }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:snowflake.snowpark.modin.plugin.utils.warning_message:Snowpark pandas support for Timedelta is not currently available.\n" - ] - } - ], + "outputs": [], "source": [ "df[\"length_of_stay\"] = (df[\"outtime\"]-df[\"intime\"])/pd.Timedelta('1 hour')" ] @@ -409,7 +444,14 @@ "cell_type": "code", "execution_count": 8, "id": "ecc19928-1d3a-49b8-bc0d-4270e53bfc4c", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:50.890752Z", + "iopub.status.busy": "2024-08-28T17:27:50.890619Z", + "iopub.status.idle": "2024-08-28T17:27:51.188395Z", + "shell.execute_reply": "2024-08-28T17:27:51.188083Z" + } + }, "outputs": [], "source": [ "df[\"age\"] = df[\"intime\"].dt.year-df[\"dob\"].dt.year" @@ -428,6 +470,12 @@ "execution_count": 9, "id": "50c62f3f-a804-4efd-89bb-cf689a870055", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:51.190704Z", + "iopub.status.busy": "2024-08-28T17:27:51.190570Z", + "iopub.status.idle": "2024-08-28T17:27:51.563926Z", + "shell.execute_reply": "2024-08-28T17:27:51.563299Z" + }, "tags": [] }, "outputs": [], @@ -450,6 +498,12 @@ "execution_count": 10, "id": "66ac1e04-4581-4292-8b7a-b88faa76edf5", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:51.567168Z", + "iopub.status.busy": "2024-08-28T17:27:51.566843Z", + "iopub.status.idle": "2024-08-28T17:27:52.325449Z", + "shell.execute_reply": "2024-08-28T17:27:52.325162Z" + }, "tags": [] }, "outputs": [ @@ -472,7 +526,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -514,6 +568,12 @@ "execution_count": 11, "id": "17b76fe7-4d6d-4eb4-bebe-55cc643b69f3", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:52.327478Z", + "iopub.status.busy": "2024-08-28T17:27:52.327310Z", + "iopub.status.idle": "2024-08-28T17:27:55.549227Z", + "shell.execute_reply": "2024-08-28T17:27:55.548770Z" + }, "tags": [] }, "outputs": [], @@ -534,6 +594,12 @@ "execution_count": 12, "id": "8514feca-f6b3-4186-bd32-ef07ba8efed4", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:55.552055Z", + "iopub.status.busy": "2024-08-28T17:27:55.551878Z", + "iopub.status.idle": "2024-08-28T17:27:55.941773Z", + "shell.execute_reply": "2024-08-28T17:27:55.941284Z" + }, "tags": [] }, "outputs": [], @@ -546,6 +612,12 @@ "execution_count": 13, "id": "bf8025c3-8657-41a7-8feb-6afab251ccfd", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:55.944225Z", + "iopub.status.busy": "2024-08-28T17:27:55.944051Z", + "iopub.status.idle": "2024-08-28T17:27:56.081283Z", + "shell.execute_reply": "2024-08-28T17:27:56.080891Z" + }, "tags": [] }, "outputs": [], @@ -569,6 +641,12 @@ "execution_count": 14, "id": "60ba61f7-fa60-4a6d-8b06-1282d2f64382", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:56.083562Z", + "iopub.status.busy": "2024-08-28T17:27:56.083425Z", + "iopub.status.idle": "2024-08-28T17:27:56.085148Z", + "shell.execute_reply": "2024-08-28T17:27:56.084867Z" + }, "tags": [] }, "outputs": [], @@ -582,6 +660,12 @@ "execution_count": 15, "id": "5cdeb9af-660a-4daa-98c5-f9e86699e9bd", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:56.086699Z", + "iopub.status.busy": "2024-08-28T17:27:56.086595Z", + "iopub.status.idle": "2024-08-28T17:27:57.259523Z", + "shell.execute_reply": "2024-08-28T17:27:57.259142Z" + }, "tags": [] }, "outputs": [ @@ -619,6 +703,12 @@ "execution_count": 16, "id": "2b704957-4b20-41a9-abbb-1d963a0ea0d2", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:57.261881Z", + "iopub.status.busy": "2024-08-28T17:27:57.261730Z", + "iopub.status.idle": "2024-08-28T17:27:57.985080Z", + "shell.execute_reply": "2024-08-28T17:27:57.984756Z" + }, "tags": [] }, "outputs": [ @@ -663,6 +753,12 @@ "execution_count": 17, "id": "1748639f-04b5-45e6-b836-2433b66fa29d", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:57.986888Z", + "iopub.status.busy": "2024-08-28T17:27:57.986758Z", + "iopub.status.idle": "2024-08-28T17:27:59.498296Z", + "shell.execute_reply": "2024-08-28T17:27:59.498013Z" + }, "tags": [] }, "outputs": [ @@ -702,6 +798,12 @@ "execution_count": 18, "id": "24a34764-f442-4cc1-8b87-ed96ace34651", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:59.500159Z", + "iopub.status.busy": "2024-08-28T17:27:59.500025Z", + "iopub.status.idle": "2024-08-28T17:28:00.076867Z", + "shell.execute_reply": "2024-08-28T17:28:00.076522Z" + }, "tags": [] }, "outputs": [ @@ -731,6 +833,12 @@ "execution_count": 19, "id": "96753257-acd4-4ba9-b81b-19dc0a2af53c", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:00.079097Z", + "iopub.status.busy": "2024-08-28T17:28:00.078936Z", + "iopub.status.idle": "2024-08-28T17:28:00.081233Z", + "shell.execute_reply": "2024-08-28T17:28:00.080958Z" + }, "tags": [] }, "outputs": [ @@ -762,6 +870,12 @@ "execution_count": 20, "id": "2d26eee2-671a-4ff8-ac22-62612c1a1ced", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:00.083739Z", + "iopub.status.busy": "2024-08-28T17:28:00.083616Z", + "iopub.status.idle": "2024-08-28T17:28:00.944153Z", + "shell.execute_reply": "2024-08-28T17:28:00.943809Z" + }, "tags": [] }, "outputs": [], @@ -801,6 +915,12 @@ "execution_count": 21, "id": "21aef8ae-47d8-4c77-8e04-270304c41d4e", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:00.946550Z", + "iopub.status.busy": "2024-08-28T17:28:00.946409Z", + "iopub.status.idle": "2024-08-28T17:28:02.622587Z", + "shell.execute_reply": "2024-08-28T17:28:02.622199Z" + }, "tags": [] }, "outputs": [ @@ -837,6 +957,12 @@ "execution_count": 22, "id": "2d11b951-5b4c-4a98-ae4c-883fbccd56a7", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:02.624633Z", + "iopub.status.busy": "2024-08-28T17:28:02.624483Z", + "iopub.status.idle": "2024-08-28T17:28:02.933061Z", + "shell.execute_reply": "2024-08-28T17:28:02.932626Z" + }, "tags": [] }, "outputs": [], @@ -850,6 +976,12 @@ "execution_count": 23, "id": "35155531-c8ff-4ed1-9a3e-e457176f9f20", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:02.935312Z", + "iopub.status.busy": "2024-08-28T17:28:02.935204Z", + "iopub.status.idle": "2024-08-28T17:28:04.421197Z", + "shell.execute_reply": "2024-08-28T17:28:04.420876Z" + }, "tags": [] }, "outputs": [ @@ -1006,6 +1138,12 @@ "execution_count": 24, "id": "b8c41494-755a-485b-8119-9dfff98213df", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:04.423275Z", + "iopub.status.busy": "2024-08-28T17:28:04.423133Z", + "iopub.status.idle": "2024-08-28T17:28:05.150707Z", + "shell.execute_reply": "2024-08-28T17:28:05.150379Z" + }, "tags": [] }, "outputs": [ @@ -1053,6 +1191,12 @@ "id": "62d79c9b9a1e3fca", "metadata": { "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:28:05.152844Z", + "iopub.status.busy": "2024-08-28T17:28:05.152694Z", + "iopub.status.idle": "2024-08-28T17:28:07.249455Z", + "shell.execute_reply": "2024-08-28T17:28:07.248760Z" + }, "jupyter": { "outputs_hidden": false } @@ -1067,6 +1211,12 @@ "execution_count": 26, "id": "719049a4-0a5b-45da-bbd5-8ff073c95a93", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:07.253088Z", + "iopub.status.busy": "2024-08-28T17:28:07.252690Z", + "iopub.status.idle": "2024-08-28T17:28:07.972094Z", + "shell.execute_reply": "2024-08-28T17:28:07.971764Z" + }, "tags": [] }, "outputs": [], @@ -1089,6 +1239,12 @@ "execution_count": 27, "id": "9e1f2052-7405-496c-b4de-76e031978cb5", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:07.974120Z", + "iopub.status.busy": "2024-08-28T17:28:07.973931Z", + "iopub.status.idle": "2024-08-28T17:28:07.979210Z", + "shell.execute_reply": "2024-08-28T17:28:07.978966Z" + }, "tags": [] }, "outputs": [ @@ -1499,7 +1655,7 @@ " /* fitted */\n", " background-color: var(--sklearn-color-fitted-level-3);\n", "}\n", - "
GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "
GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "GaussianNB()" @@ -1521,6 +1677,12 @@ "execution_count": 28, "id": "dcb50a0d-3f66-4376-a383-597789f83fa0", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:07.980853Z", + "iopub.status.busy": "2024-08-28T17:28:07.980748Z", + "iopub.status.idle": "2024-08-28T17:28:07.983038Z", + "shell.execute_reply": "2024-08-28T17:28:07.982709Z" + }, "tags": [] }, "outputs": [], @@ -1541,13 +1703,19 @@ "execution_count": 29, "id": "4993b18c-7d2a-49b6-96f5-b4a7c6a38cc2", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:07.984580Z", + "iopub.status.busy": "2024-08-28T17:28:07.984481Z", + "iopub.status.idle": "2024-08-28T17:28:08.048040Z", + "shell.execute_reply": "2024-08-28T17:28:08.047710Z" + }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, "execution_count": 29, @@ -1556,7 +1724,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -1578,6 +1746,12 @@ "execution_count": 30, "id": "0c6fde6b-1126-4625-9c6e-7223eb97c30b", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:28:08.049898Z", + "iopub.status.busy": "2024-08-28T17:28:08.049780Z", + "iopub.status.idle": "2024-08-28T17:28:08.052183Z", + "shell.execute_reply": "2024-08-28T17:28:08.051914Z" + }, "tags": [] }, "outputs": [ @@ -1655,7 +1829,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/tests/notebooks/modin/SnowflakeChainTesting.ipynb b/tests/notebooks/modin/SnowflakeChainTesting.ipynb index 0d52f936e12..385437a72a7 100644 --- a/tests/notebooks/modin/SnowflakeChainTesting.ipynb +++ b/tests/notebooks/modin/SnowflakeChainTesting.ipynb @@ -8,17 +8,15 @@ "ExecuteTime": { "end_time": "2024-03-07T18:28:36.473541Z", "start_time": "2024-03-07T18:28:35.266455Z" + }, + "execution": { + "iopub.execute_input": "2024-08-28T17:52:12.334742Z", + "iopub.status.busy": "2024-08-28T17:52:12.334629Z", + "iopub.status.idle": "2024-08-28T17:52:13.600342Z", + "shell.execute_reply": "2024-08-28T17:52:13.599816Z" } }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "UserWarning: Snowpark pandas is currently in Private Preview. See https://docs.snowflake.com/LIMITEDACCESS/snowpark-pandas for details.\n" - ] - } - ], + "outputs": [], "source": [ "from pathlib import Path\n", "import sys\n", @@ -41,6 +39,12 @@ "ExecuteTime": { "end_time": "2024-03-07T18:28:41.717889Z", "start_time": "2024-03-07T18:28:36.475455Z" + }, + "execution": { + "iopub.execute_input": "2024-08-28T17:52:13.604060Z", + "iopub.status.busy": "2024-08-28T17:52:13.603731Z", + "iopub.status.idle": "2024-08-28T17:52:17.597094Z", + "shell.execute_reply": "2024-08-28T17:52:17.596200Z" } }, "outputs": [], @@ -56,6 +60,12 @@ "ExecuteTime": { "end_time": "2024-03-07T18:28:42.579467Z", "start_time": "2024-03-07T18:28:41.720668Z" + }, + "execution": { + "iopub.execute_input": "2024-08-28T17:52:17.601819Z", + "iopub.status.busy": "2024-08-28T17:52:17.601431Z", + "iopub.status.idle": "2024-08-28T17:52:18.520682Z", + "shell.execute_reply": "2024-08-28T17:52:18.520391Z" } }, "outputs": [ @@ -95,34 +105,346 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "id": "5d9f5d2b", "metadata": { "ExecuteTime": { "end_time": "2024-03-07T18:28:57.276309Z", "start_time": "2024-03-07T18:28:42.577226Z" + }, + "execution": { + "iopub.execute_input": "2024-08-28T17:52:18.523427Z", + "iopub.status.busy": "2024-08-28T17:52:18.523060Z", + "iopub.status.idle": "2024-08-28T17:52:18.941964Z", + "shell.execute_reply": "2024-08-28T17:52:18.941577Z" } }, "outputs": [ { - "ename": "NotImplementedError", - "evalue": "Snowpark pandas dropna API doesn't yet support axis == 1", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[9], line 8\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mchained_one\u001b[39m(df):\n\u001b[1;32m 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (df\n\u001b[1;32m 3\u001b[0m \u001b[38;5;241m.\u001b[39mdrop(columns\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPULOCATIONID\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDOLOCATIONID\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# dropna(axis=1)\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;241m.\u001b[39mdropna(axis\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcolumns\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 6\u001b[0m )\n\u001b[0;32m----> 8\u001b[0m df_one \u001b[38;5;241m=\u001b[39m \u001b[43mchained_one\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 9\u001b[0m df_one\n", - "Cell \u001b[0;32mIn[9], line 2\u001b[0m, in \u001b[0;36mchained_one\u001b[0;34m(df)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mchained_one\u001b[39m(df):\n\u001b[0;32m----> 2\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (\u001b[43mdf\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mPULOCATIONID\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mDOLOCATIONID\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;66;43;03m# dropna(axis=1)\u001b[39;49;00m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m(\u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mcolumns\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 6\u001b[0m )\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py:382\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_method_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 380\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs: Any, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Callable:\n\u001b[0;32m--> 382\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 383\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 384\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 385\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 386\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 387\u001b[0m \u001b[43m \u001b[49m\u001b[43mproperty_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproperty_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 388\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py:316\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name)\u001b[0m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 308\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 309\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 310\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 311\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 314\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 315\u001b[0m )\n\u001b[0;32m--> 316\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[1;32m 318\u001b[0m \u001b[38;5;66;03m# Not inplace lazy APIs: add curr_api_call to the result\u001b[39;00m\n\u001b[1;32m 319\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_snowpark_pandas_dataframe_or_series_type(result):\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py:306\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name)\u001b[0m\n\u001b[1;32m 300\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 301\u001b[0m \u001b[38;5;66;03m# query_history is a QueryHistory instance which is a Context Managers\u001b[39;00m\n\u001b[1;32m 302\u001b[0m \u001b[38;5;66;03m# See example in https://github.com/snowflakedb/snowpark-python/blob/main/src/snowflake/snowpark/session.py#L2052\u001b[39;00m\n\u001b[1;32m 303\u001b[0m \u001b[38;5;66;03m# Use `nullcontext` to handle `session` lacking `query_history` attribute without raising an exception.\u001b[39;00m\n\u001b[1;32m 304\u001b[0m \u001b[38;5;66;03m# This prevents telemetry from interfering with regular API calls.\u001b[39;00m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(session, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mquery_history\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)() \u001b[38;5;28;01mas\u001b[39;00m query_history:\n\u001b[0;32m--> 306\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 308\u001b[0m \u001b[38;5;66;03m# Send Telemetry and Raise Error\u001b[39;00m\n\u001b[1;32m 309\u001b[0m _send_snowpark_pandas_telemetry_helper(\n\u001b[1;32m 310\u001b[0m session\u001b[38;5;241m=\u001b[39msession,\n\u001b[1;32m 311\u001b[0m telemetry_type\u001b[38;5;241m=\u001b[39merror_to_telemetry_type(e),\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 314\u001b[0m api_calls\u001b[38;5;241m=\u001b[39mexisting_api_calls \u001b[38;5;241m+\u001b[39m [curr_api_call],\n\u001b[1;32m 315\u001b[0m )\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/pandas/dataframe.py:362\u001b[0m, in \u001b[0;36mDataFrame.dropna\u001b[0;34m(self, axis, how, thresh, subset, inplace)\u001b[0m\n\u001b[1;32m 353\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdropna\u001b[39m(\n\u001b[1;32m 354\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 355\u001b[0m \u001b[38;5;241m*\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 360\u001b[0m inplace: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 361\u001b[0m ): \u001b[38;5;66;03m# TODO: SNOW-1063346: Modin upgrade - modin.pandas.DataFrame functions\u001b[39;00m\n\u001b[0;32m--> 362\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_dropna\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 363\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhow\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mthresh\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mthresh\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msubset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubset\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\n\u001b[1;32m 364\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/pandas/base.py:1481\u001b[0m, in \u001b[0;36mBasePandasDataset._dropna\u001b[0;34m(self, axis, how, thresh, subset, inplace)\u001b[0m\n\u001b[1;32m 1478\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m check\u001b[38;5;241m.\u001b[39many():\n\u001b[1;32m 1479\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;28mlist\u001b[39m(np\u001b[38;5;241m.\u001b[39mcompress(check, subset)))\n\u001b[0;32m-> 1481\u001b[0m new_query_compiler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_query_compiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1482\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1483\u001b[0m \u001b[43m \u001b[49m\u001b[43mhow\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhow\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1484\u001b[0m \u001b[43m \u001b[49m\u001b[43mthresh\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mthresh\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1485\u001b[0m \u001b[43m \u001b[49m\u001b[43msubset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1486\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1487\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_or_update_from_compiler(new_query_compiler, inplace)\n", - "File \u001b[0;32m~/anaconda3/envs/snowpark-modin-1.15/lib/python3.9/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:7957\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.dropna\u001b[0;34m(self, axis, how, thresh, subset)\u001b[0m\n\u001b[1;32m 7944\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 7945\u001b[0m \u001b[38;5;124;03mRemove missing values. If 'thresh' is specified then the 'how' parameter is ignored.\u001b[39;00m\n\u001b[1;32m 7946\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 7954\u001b[0m \u001b[38;5;124;03mNew QueryCompiler with null values dropped along given axis.\u001b[39;00m\n\u001b[1;32m 7955\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 7956\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m axis \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m-> 7957\u001b[0m \u001b[43mErrorMessage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnot_implemented\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 7958\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mSnowpark pandas dropna API doesn\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mt yet support axis == 1\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\n\u001b[1;32m 7959\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 7961\u001b[0m \u001b[38;5;66;03m# reuse Snowpark Dataframe's dropna API and make sure to define subset correctly, i.e., only contain data\u001b[39;00m\n\u001b[1;32m 7962\u001b[0m \u001b[38;5;66;03m# columns\u001b[39;00m\n\u001b[1;32m 7963\u001b[0m subset_data_col_ids \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 7964\u001b[0m \u001b[38;5;28mid\u001b[39m\n\u001b[1;32m 7965\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m label, \u001b[38;5;28mid\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 7969\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m subset \u001b[38;5;129;01mor\u001b[39;00m label \u001b[38;5;129;01min\u001b[39;00m subset\n\u001b[1;32m 7970\u001b[0m ]\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/utils/error_message.py:151\u001b[0m, in \u001b[0;36mErrorMessage.not_implemented\u001b[0;34m(cls, message)\u001b[0m\n\u001b[1;32m 148\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 149\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mnot_implemented\u001b[39m(\u001b[38;5;28mcls\u001b[39m, message: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m NoReturn: \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 150\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNotImplementedError: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmessage\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 151\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(message)\n", - "\u001b[0;31mNotImplementedError\u001b[0m: Snowpark pandas dropna API doesn't yet support axis == 1" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VENDORIDTPEP_PICKUP_DATETIMETPEP_DROPOFF_DATETIMEPASSENGER_COUNTTRIP_DISTANCERATECODEIDSTORE_AND_FWD_FLAGPAYMENT_TYPEFARE_AMOUNTEXTRAMTA_TAXTIP_AMOUNTTOLLS_AMOUNTIMPROVEMENT_SURCHARGETOTAL_AMOUNTCONGESTION_SURCHARGEAIRPORT_FEE
022015-01-01 09:39:082015-01-01 09:59:49111.891N134.00.00.57.875.330.348.00NaNNaN
122015-01-01 04:01:362015-01-01 04:31:36112.581N138.50.50.57.800.000.347.60NaNNaN
222015-01-01 13:52:202015-01-01 13:58:5011.011N16.50.00.51.620.000.38.92NaNNaN
322015-01-01 12:43:332015-01-01 12:51:4552.271N19.50.00.51.900.000.312.20NaNNaN
422015-01-01 03:36:362015-01-01 03:44:4851.761N28.00.50.50.000.000.39.30NaNNaN
......................................................
1499522015-01-01 11:51:582015-01-01 11:54:3920.661N14.50.00.50.000.000.35.30NaNNaN
1499622015-01-01 08:52:402015-01-01 09:01:1421.891N28.50.00.50.000.000.39.30NaNNaN
1499712015-01-01 03:25:032015-01-01 03:44:5617.501N223.50.50.50.000.000.024.80NaNNaN
1499822015-01-01 03:03:532015-01-01 03:21:2727.361N122.50.50.54.600.000.328.40NaNNaN
1499912015-01-01 02:04:062015-01-01 02:14:5923.001N111.50.50.51.000.000.013.80NaNNaN
\n", + "

15000 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " VENDORID TPEP_PICKUP_DATETIME TPEP_DROPOFF_DATETIME PASSENGER_COUNT \\\n", + "0 2 2015-01-01 09:39:08 2015-01-01 09:59:49 1 \n", + "1 2 2015-01-01 04:01:36 2015-01-01 04:31:36 1 \n", + "2 2 2015-01-01 13:52:20 2015-01-01 13:58:50 1 \n", + "3 2 2015-01-01 12:43:33 2015-01-01 12:51:45 5 \n", + "4 2 2015-01-01 03:36:36 2015-01-01 03:44:48 5 \n", + "... ... ... ... ... \n", + "14995 2 2015-01-01 11:51:58 2015-01-01 11:54:39 2 \n", + "14996 2 2015-01-01 08:52:40 2015-01-01 09:01:14 2 \n", + "14997 1 2015-01-01 03:25:03 2015-01-01 03:44:56 1 \n", + "14998 2 2015-01-01 03:03:53 2015-01-01 03:21:27 2 \n", + "14999 1 2015-01-01 02:04:06 2015-01-01 02:14:59 2 \n", + "\n", + " TRIP_DISTANCE RATECODEID STORE_AND_FWD_FLAG PAYMENT_TYPE \\\n", + "0 11.89 1 N 1 \n", + "1 12.58 1 N 1 \n", + "2 1.01 1 N 1 \n", + "3 2.27 1 N 1 \n", + "4 1.76 1 N 2 \n", + "... ... ... ... ... \n", + "14995 0.66 1 N 1 \n", + "14996 1.89 1 N 2 \n", + "14997 7.50 1 N 2 \n", + "14998 7.36 1 N 1 \n", + "14999 3.00 1 N 1 \n", + "\n", + " FARE_AMOUNT EXTRA MTA_TAX TIP_AMOUNT TOLLS_AMOUNT \\\n", + "0 34.0 0.0 0.5 7.87 5.33 \n", + "1 38.5 0.5 0.5 7.80 0.00 \n", + "2 6.5 0.0 0.5 1.62 0.00 \n", + "3 9.5 0.0 0.5 1.90 0.00 \n", + "4 8.0 0.5 0.5 0.00 0.00 \n", + "... ... ... ... ... ... \n", + "14995 4.5 0.0 0.5 0.00 0.00 \n", + "14996 8.5 0.0 0.5 0.00 0.00 \n", + "14997 23.5 0.5 0.5 0.00 0.00 \n", + "14998 22.5 0.5 0.5 4.60 0.00 \n", + "14999 11.5 0.5 0.5 1.00 0.00 \n", + "\n", + " IMPROVEMENT_SURCHARGE TOTAL_AMOUNT CONGESTION_SURCHARGE AIRPORT_FEE \n", + "0 0.3 48.00 NaN NaN \n", + "1 0.3 47.60 NaN NaN \n", + "2 0.3 8.92 NaN NaN \n", + "3 0.3 12.20 NaN NaN \n", + "4 0.3 9.30 NaN NaN \n", + "... ... ... ... ... \n", + "14995 0.3 5.30 NaN NaN \n", + "14996 0.3 9.30 NaN NaN \n", + "14997 0.0 24.80 NaN NaN \n", + "14998 0.3 28.40 NaN NaN \n", + "14999 0.0 13.80 NaN NaN \n", + "\n", + "[15000 rows x 17 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -139,15 +461,29 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "07955149", "metadata": { "ExecuteTime": { "end_time": "2024-03-07T18:28:57.678995Z", "start_time": "2024-03-07T18:28:57.276560Z" + }, + "execution": { + "iopub.execute_input": "2024-08-28T17:52:18.946285Z", + "iopub.status.busy": "2024-08-28T17:52:18.946120Z", + "iopub.status.idle": "2024-08-28T17:52:19.544569Z", + "shell.execute_reply": "2024-08-28T17:52:19.544031Z" } }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:snowflake.snowpark.modin.plugin.utils.warning_message:`to_datetime` implementation has mismatches with pandas:\n", + "Snowpark pandas to_datetime uses Snowflake's automatic format detection to convert string to datetime when a format is not provided. In this case Snowflake's auto format may yield different result values compared to pandas..\n" + ] + }, { "name": "stderr", "output_type": "stream", @@ -204,7 +540,7 @@ " 1\n", " 0.24\n", " 1\n", - " False\n", + " N\n", " 2\n", " 3.0\n", " 0.5\n", @@ -213,8 +549,8 @@ " 0.0\n", " 0.3\n", " 4.30\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 822\n", @@ -224,7 +560,7 @@ " 1\n", " 0.80\n", " 1\n", - " False\n", + " N\n", " 2\n", " 7.5\n", " 0.5\n", @@ -233,8 +569,8 @@ " 0.0\n", " 0.0\n", " 8.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 10330\n", @@ -244,7 +580,7 @@ " 2\n", " 3.65\n", " 1\n", - " False\n", + " N\n", " 1\n", " 14.0\n", " 0.5\n", @@ -253,8 +589,8 @@ " 0.0\n", " 0.3\n", " 18.92\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 2282\n", @@ -264,7 +600,7 @@ " 2\n", " 1.10\n", " 1\n", - " False\n", + " N\n", " 1\n", " 7.5\n", " 0.5\n", @@ -273,8 +609,8 @@ " 0.0\n", " 0.0\n", " 9.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 11184\n", @@ -284,7 +620,7 @@ " 4\n", " 0.90\n", " 1\n", - " False\n", + " N\n", " 1\n", " 5.0\n", " 0.5\n", @@ -293,8 +629,8 @@ " 0.0\n", " 0.0\n", " 7.55\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " ...\n", @@ -324,7 +660,7 @@ " 1\n", " 1.50\n", " 1\n", - " False\n", + " N\n", " 2\n", " 8.0\n", " 0.0\n", @@ -333,8 +669,8 @@ " 0.0\n", " 0.0\n", " 8.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 331\n", @@ -344,7 +680,7 @@ " 1\n", " 1.10\n", " 1\n", - " False\n", + " N\n", " 1\n", " 6.5\n", " 0.0\n", @@ -353,8 +689,8 @@ " 0.0\n", " 0.0\n", " 8.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 10685\n", @@ -364,7 +700,7 @@ " 3\n", " 2.90\n", " 1\n", - " False\n", + " N\n", " 2\n", " 11.0\n", " 0.0\n", @@ -373,8 +709,8 @@ " 0.0\n", " 0.0\n", " 11.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 12300\n", @@ -384,7 +720,7 @@ " 2\n", " 3.38\n", " 1\n", - " False\n", + " N\n", " 1\n", " 12.0\n", " 0.0\n", @@ -393,8 +729,8 @@ " 0.0\n", " 0.3\n", " 14.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 9211\n", @@ -404,7 +740,7 @@ " 1\n", " 1.40\n", " 1\n", - " False\n", + " N\n", " 2\n", " 6.5\n", " 0.0\n", @@ -413,8 +749,8 @@ " 0.0\n", " 0.0\n", " 7.30\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", "\n", @@ -435,18 +771,18 @@ "12300 2 2015-01-01 14:59:48 2015-01-01 15:09:37 2 \n", "9211 1 2015-01-01 14:59:55 2015-01-01 15:06:01 1 \n", "\n", - " TRIP_DISTANCE RATECODEID STORE_AND_FWD_FLAG PAYMENT_TYPE \\\n", - "10799 0.24 1 False 2 \n", - "822 0.80 1 False 2 \n", - "10330 3.65 1 False 1 \n", - "2282 1.10 1 False 1 \n", - "11184 0.90 1 False 1 \n", - "... ... ... ... ... \n", - "9483 1.50 1 False 2 \n", - "331 1.10 1 False 1 \n", - "10685 2.90 1 False 2 \n", - "12300 3.38 1 False 1 \n", - "9211 1.40 1 False 2 \n", + " TRIP_DISTANCE RATECODEID STORE_AND_FWD_FLAG PAYMENT_TYPE \\\n", + "10799 0.24 1 N 2 \n", + "822 0.80 1 N 2 \n", + "10330 3.65 1 N 1 \n", + "2282 1.10 1 N 1 \n", + "11184 0.90 1 N 1 \n", + "... ... ... ... ... \n", + "9483 1.50 1 N 2 \n", + "331 1.10 1 N 1 \n", + "10685 2.90 1 N 2 \n", + "12300 3.38 1 N 1 \n", + "9211 1.40 1 N 2 \n", "\n", " FARE_AMOUNT EXTRA MTA_TAX TIP_AMOUNT TOLLS_AMOUNT \\\n", "10799 3.0 0.5 0.5 0.00 0.0 \n", @@ -461,23 +797,23 @@ "12300 12.0 0.0 0.5 2.00 0.0 \n", "9211 6.5 0.0 0.5 0.00 0.0 \n", "\n", - " IMPROVEMENT_SURCHARGE TOTAL_AMOUNT CONGESTION_SURCHARGE AIRPORT_FEE \n", - "10799 0.3 4.30 None None \n", - "822 0.0 8.80 None None \n", - "10330 0.3 18.92 None None \n", - "2282 0.0 9.80 None None \n", - "11184 0.0 7.55 None None \n", - "... ... ... ... ... \n", - "9483 0.0 8.80 None None \n", - "331 0.0 8.80 None None \n", - "10685 0.0 11.80 None None \n", - "12300 0.3 14.80 None None \n", - "9211 0.0 7.30 None None \n", + " IMPROVEMENT_SURCHARGE TOTAL_AMOUNT CONGESTION_SURCHARGE AIRPORT_FEE \n", + "10799 0.3 4.30 NaN NaN \n", + "822 0.0 8.80 NaN NaN \n", + "10330 0.3 18.92 NaN NaN \n", + "2282 0.0 9.80 NaN NaN \n", + "11184 0.0 7.55 NaN NaN \n", + "... ... ... ... ... \n", + "9483 0.0 8.80 NaN NaN \n", + "331 0.0 8.80 NaN NaN \n", + "10685 0.0 11.80 NaN NaN \n", + "12300 0.3 14.80 NaN NaN \n", + "9211 0.0 7.30 NaN NaN \n", "\n", "[15000 rows x 17 columns]" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -490,12 +826,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "fe08e4c0", "metadata": { "ExecuteTime": { "end_time": "2024-03-07T18:28:57.906565Z", "start_time": "2024-03-07T18:28:57.681687Z" + }, + "execution": { + "iopub.execute_input": "2024-08-28T17:52:19.551182Z", + "iopub.status.busy": "2024-08-28T17:52:19.550955Z", + "iopub.status.idle": "2024-08-28T17:52:20.055900Z", + "shell.execute_reply": "2024-08-28T17:52:20.055329Z" } }, "outputs": [ @@ -555,7 +897,7 @@ " 1\n", " 0.24\n", " 1\n", - " False\n", + " N\n", " 2\n", " 3.0\n", " 0.5\n", @@ -564,8 +906,8 @@ " 0.0\n", " 0.3\n", " 4.30\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 822\n", @@ -575,7 +917,7 @@ " 1\n", " 0.80\n", " 1\n", - " False\n", + " N\n", " 2\n", " 7.5\n", " 0.5\n", @@ -584,8 +926,8 @@ " 0.0\n", " 0.0\n", " 8.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 10330\n", @@ -595,7 +937,7 @@ " 2\n", " 3.65\n", " 1\n", - " False\n", + " N\n", " 1\n", " 14.0\n", " 0.5\n", @@ -604,8 +946,8 @@ " 0.0\n", " 0.3\n", " 18.92\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 2282\n", @@ -615,7 +957,7 @@ " 2\n", " 1.10\n", " 1\n", - " False\n", + " N\n", " 1\n", " 7.5\n", " 0.5\n", @@ -624,8 +966,8 @@ " 0.0\n", " 0.0\n", " 9.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 11184\n", @@ -635,7 +977,7 @@ " 4\n", " 0.90\n", " 1\n", - " False\n", + " N\n", " 1\n", " 5.0\n", " 0.5\n", @@ -644,8 +986,8 @@ " 0.0\n", " 0.0\n", " 7.55\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " ...\n", @@ -675,7 +1017,7 @@ " 1\n", " 1.50\n", " 1\n", - " False\n", + " N\n", " 2\n", " 8.0\n", " 0.0\n", @@ -684,8 +1026,8 @@ " 0.0\n", " 0.0\n", " 8.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 331\n", @@ -695,7 +1037,7 @@ " 1\n", " 1.10\n", " 1\n", - " False\n", + " N\n", " 1\n", " 6.5\n", " 0.0\n", @@ -704,8 +1046,8 @@ " 0.0\n", " 0.0\n", " 8.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 10685\n", @@ -715,7 +1057,7 @@ " 3\n", " 2.90\n", " 1\n", - " False\n", + " N\n", " 2\n", " 11.0\n", " 0.0\n", @@ -724,8 +1066,8 @@ " 0.0\n", " 0.0\n", " 11.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 12300\n", @@ -735,7 +1077,7 @@ " 2\n", " 3.38\n", " 1\n", - " False\n", + " N\n", " 1\n", " 12.0\n", " 0.0\n", @@ -744,8 +1086,8 @@ " 0.0\n", " 0.3\n", " 14.80\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", " 9211\n", @@ -755,7 +1097,7 @@ " 1\n", " 1.40\n", " 1\n", - " False\n", + " N\n", " 2\n", " 6.5\n", " 0.0\n", @@ -764,8 +1106,8 @@ " 0.0\n", " 0.0\n", " 7.30\n", - " None\n", - " None\n", + " NaN\n", + " NaN\n", " \n", " \n", "\n", @@ -786,18 +1128,18 @@ "12300 2 2015-01-01 14:59:48 2015-01-01 15:09:37 2 \n", "9211 1 2015-01-01 14:59:55 2015-01-01 15:06:01 1 \n", "\n", - " TRIP_DISTANCE RATECODEID STORE_AND_FWD_FLAG PAYMENT_TYPE \\\n", - "10799 0.24 1 False 2 \n", - "822 0.80 1 False 2 \n", - "10330 3.65 1 False 1 \n", - "2282 1.10 1 False 1 \n", - "11184 0.90 1 False 1 \n", - "... ... ... ... ... \n", - "9483 1.50 1 False 2 \n", - "331 1.10 1 False 1 \n", - "10685 2.90 1 False 2 \n", - "12300 3.38 1 False 1 \n", - "9211 1.40 1 False 2 \n", + " TRIP_DISTANCE RATECODEID STORE_AND_FWD_FLAG PAYMENT_TYPE \\\n", + "10799 0.24 1 N 2 \n", + "822 0.80 1 N 2 \n", + "10330 3.65 1 N 1 \n", + "2282 1.10 1 N 1 \n", + "11184 0.90 1 N 1 \n", + "... ... ... ... ... \n", + "9483 1.50 1 N 2 \n", + "331 1.10 1 N 1 \n", + "10685 2.90 1 N 2 \n", + "12300 3.38 1 N 1 \n", + "9211 1.40 1 N 2 \n", "\n", " FARE_AMOUNT EXTRA MTA_TAX TIP_AMOUNT TOLLS_AMOUNT \\\n", "10799 3.0 0.5 0.5 0.00 0.0 \n", @@ -812,23 +1154,23 @@ "12300 12.0 0.0 0.5 2.00 0.0 \n", "9211 6.5 0.0 0.5 0.00 0.0 \n", "\n", - " IMPROVEMENT_SURCHARGE TOTAL_AMOUNT CONGESTION_SURCHARGE AIRPORT_FEE \n", - "10799 0.3 4.30 None None \n", - "822 0.0 8.80 None None \n", - "10330 0.3 18.92 None None \n", - "2282 0.0 9.80 None None \n", - "11184 0.0 7.55 None None \n", - "... ... ... ... ... \n", - "9483 0.0 8.80 None None \n", - "331 0.0 8.80 None None \n", - "10685 0.0 11.80 None None \n", - "12300 0.3 14.80 None None \n", - "9211 0.0 7.30 None None \n", + " IMPROVEMENT_SURCHARGE TOTAL_AMOUNT CONGESTION_SURCHARGE AIRPORT_FEE \n", + "10799 0.3 4.30 NaN NaN \n", + "822 0.0 8.80 NaN NaN \n", + "10330 0.3 18.92 NaN NaN \n", + "2282 0.0 9.80 NaN NaN \n", + "11184 0.0 7.55 NaN NaN \n", + "... ... ... ... ... \n", + "9483 0.0 8.80 NaN NaN \n", + "331 0.0 8.80 NaN NaN \n", + "10685 0.0 11.80 NaN NaN \n", + "12300 0.3 14.80 NaN NaN \n", + "9211 0.0 7.30 NaN NaN \n", "\n", "[15000 rows x 17 columns]" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -845,12 +1187,18 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "afef5a5c", "metadata": { "ExecuteTime": { "end_time": "2024-03-07T18:28:58.433517Z", "start_time": "2024-03-07T18:28:57.910033Z" + }, + "execution": { + "iopub.execute_input": "2024-08-28T17:52:20.062655Z", + "iopub.status.busy": "2024-08-28T17:52:20.062443Z", + "iopub.status.idle": "2024-08-28T17:52:20.696390Z", + "shell.execute_reply": "2024-08-28T17:52:20.695770Z" } }, "outputs": [ @@ -987,7 +1335,7 @@ "6 1.462722 0.186825 15.231608 " ] }, - "execution_count": 8, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -1008,12 +1356,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "bf04ee9b", "metadata": { "ExecuteTime": { "end_time": "2024-03-07T18:28:58.441220Z", "start_time": "2024-03-07T18:28:58.432402Z" + }, + "execution": { + "iopub.execute_input": "2024-08-28T17:52:20.700371Z", + "iopub.status.busy": "2024-08-28T17:52:20.700144Z", + "iopub.status.idle": "2024-08-28T17:52:20.702635Z", + "shell.execute_reply": "2024-08-28T17:52:20.702103Z" } }, "outputs": [], @@ -1026,12 +1380,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "7ca17eab", "metadata": { "ExecuteTime": { "end_time": "2024-03-07T18:28:58.441415Z", "start_time": "2024-03-07T18:28:58.436132Z" + }, + "execution": { + "iopub.execute_input": "2024-08-28T17:52:20.705816Z", + "iopub.status.busy": "2024-08-28T17:52:20.705608Z", + "iopub.status.idle": "2024-08-28T17:52:20.707897Z", + "shell.execute_reply": "2024-08-28T17:52:20.707518Z" } }, "outputs": [], @@ -1042,12 +1402,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "44f84dc4", "metadata": { "ExecuteTime": { "end_time": "2024-03-07T18:28:58.442175Z", "start_time": "2024-03-07T18:28:58.439190Z" + }, + "execution": { + "iopub.execute_input": "2024-08-28T17:52:20.710120Z", + "iopub.status.busy": "2024-08-28T17:52:20.709950Z", + "iopub.status.idle": "2024-08-28T17:52:20.711901Z", + "shell.execute_reply": "2024-08-28T17:52:20.711557Z" } }, "outputs": [], @@ -1072,7 +1438,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/tests/notebooks/modin/SnowparkPandasAPIDemo.ipynb b/tests/notebooks/modin/SnowparkPandasAPIDemo.ipynb index 9ed6a5d4eb4..4a3506e0b79 100644 --- a/tests/notebooks/modin/SnowparkPandasAPIDemo.ipynb +++ b/tests/notebooks/modin/SnowparkPandasAPIDemo.ipynb @@ -18,16 +18,15 @@ "cell_type": "code", "execution_count": 1, "id": "d9388c10-9876-47a2-82a6-da35d120ff77", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "UserWarning: Snowpark pandas is currently in Private Preview. See https://docs.snowflake.com/LIMITEDACCESS/snowpark-pandas for details.\n" - ] + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:35:29.956189Z", + "iopub.status.busy": "2024-08-28T17:35:29.956006Z", + "iopub.status.idle": "2024-08-28T17:35:31.117161Z", + "shell.execute_reply": "2024-08-28T17:35:31.116614Z" } - ], + }, + "outputs": [], "source": [ "import modin.pandas as spd\n", "import snowflake.snowpark.modin.plugin\n", @@ -57,13 +56,27 @@ "cell_type": "code", "execution_count": 2, "id": "03298234-aabe-4548-99b1-bfdb609bdafb", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:35:31.121238Z", + "iopub.status.busy": "2024-08-28T17:35:31.120973Z", + "iopub.status.idle": "2024-08-28T17:35:37.029284Z", + "shell.execute_reply": "2024-08-28T17:35:37.028960Z" + } + }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Data from source table/view 'FINANCIAL__ECONOMIC_ESSENTIALS.CYBERSYN.STOCK_PRICE_TIMESERIES' is being copied into a new temporary table 'SNOWPARK_TEMP_TABLE_515SJ7V1X4'. DataFrame creation might take some time.\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Took 9.748766167000001 seconds to read a table with 66061030 rows into Snowpark pandas!\n" + "Took 5.6064697500000005 seconds to read a table with 69641010 rows into Snowpark pandas!\n" ] } ], @@ -80,12 +93,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "b69ac2fd-c636-4bb5-a27d-58a5e4cbea7e", "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:35:37.031526Z", + "iopub.status.busy": "2024-08-28T17:35:37.031368Z", + "iopub.status.idle": "2024-08-28T17:42:00.344511Z", + "shell.execute_reply": "2024-08-28T17:42:00.344196Z" + }, "scrolled": true }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Native pandas took 383.31552045800004 seconds to read the data!\n" + ] + } + ], "source": [ "# Read data into a local native pandas df - recommended to kill this cell after waiting a few minutes!\n", "\n", @@ -124,9 +151,16 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 4, "id": "1a623bed-aed9-4cdb-a3c8-33e9e7da52af", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:42:00.346273Z", + "iopub.status.busy": "2024-08-28T17:42:00.346155Z", + "iopub.status.idle": "2024-08-28T17:42:02.046901Z", + "shell.execute_reply": "2024-08-28T17:42:02.046061Z" + } + }, "outputs": [ { "data": { @@ -162,58 +196,58 @@ " \n", " \n", " 0\n", - " INDF\n", - " ETF-Index Fund Shares\n", - " PSE\n", - " NYSE ARCA\n", + " LNG\n", + " Equity\n", + " NYS\n", + " NEW YORK STOCK EXCHANGE\n", " pre-market_open\n", " Pre-Market Open\n", - " 2024-05-08\n", - " 35.965\n", + " 2021-03-15\n", + " 75.15\n", " \n", " \n", " 1\n", - " ATLC\n", + " PNC\n", " Equity\n", - " NAS\n", - " NASDAQ CAPITAL MARKET\n", - " pre-market_open\n", - " Pre-Market Open\n", - " 2024-05-08\n", - " 27.670\n", + " NYS\n", + " NEW YORK STOCK EXCHANGE\n", + " nasdaq_volume\n", + " Nasdaq Volume\n", + " 2018-07-19\n", + " 369981.00\n", " \n", " \n", " 2\n", - " AAPR\n", + " VNQI\n", " ETF-Index Fund Shares\n", - " BAT\n", - " BATS Z-EXCHANGE\n", - " pre-market_open\n", - " Pre-Market Open\n", - " 2024-05-08\n", - " 24.650\n", + " NAS\n", + " NASDAQ CAPITAL MARKET\n", + " all-day_high\n", + " All-Day High\n", + " 2022-02-02\n", + " 53.19\n", " \n", " \n", " 3\n", - " TEL\n", + " FLNG\n", " Equity\n", " NYS\n", " NEW YORK STOCK EXCHANGE\n", - " pre-market_open\n", - " Pre-Market Open\n", - " 2024-05-08\n", - " 142.600\n", + " nasdaq_volume\n", + " Nasdaq Volume\n", + " 2021-08-12\n", + " 1068.00\n", " \n", " \n", " 4\n", - " CYTH\n", + " AGX\n", " Equity\n", - " NAS\n", - " NASDAQ CAPITAL MARKET\n", - " pre-market_open\n", - " Pre-Market Open\n", - " 2024-05-08\n", - " 1.530\n", + " NYS\n", + " NEW YORK STOCK EXCHANGE\n", + " nasdaq_volume\n", + " Nasdaq Volume\n", + " 2021-04-29\n", + " 8047.00\n", " \n", " \n", "\n", @@ -221,28 +255,28 @@ ], "text/plain": [ " TICKER ASSET_CLASS PRIMARY_EXCHANGE_CODE \\\n", - "0 INDF ETF-Index Fund Shares PSE \n", - "1 ATLC Equity NAS \n", - "2 AAPR ETF-Index Fund Shares BAT \n", - "3 TEL Equity NYS \n", - "4 CYTH Equity NAS \n", + "0 LNG Equity NYS \n", + "1 PNC Equity NYS \n", + "2 VNQI ETF-Index Fund Shares NAS \n", + "3 FLNG Equity NYS \n", + "4 AGX Equity NYS \n", "\n", " PRIMARY_EXCHANGE_NAME VARIABLE VARIABLE_NAME DATE \\\n", - "0 NYSE ARCA pre-market_open Pre-Market Open 2024-05-08 \n", - "1 NASDAQ CAPITAL MARKET pre-market_open Pre-Market Open 2024-05-08 \n", - "2 BATS Z-EXCHANGE pre-market_open Pre-Market Open 2024-05-08 \n", - "3 NEW YORK STOCK EXCHANGE pre-market_open Pre-Market Open 2024-05-08 \n", - "4 NASDAQ CAPITAL MARKET pre-market_open Pre-Market Open 2024-05-08 \n", + "0 NEW YORK STOCK EXCHANGE pre-market_open Pre-Market Open 2021-03-15 \n", + "1 NEW YORK STOCK EXCHANGE nasdaq_volume Nasdaq Volume 2018-07-19 \n", + "2 NASDAQ CAPITAL MARKET all-day_high All-Day High 2022-02-02 \n", + "3 NEW YORK STOCK EXCHANGE nasdaq_volume Nasdaq Volume 2021-08-12 \n", + "4 NEW YORK STOCK EXCHANGE nasdaq_volume Nasdaq Volume 2021-04-29 \n", "\n", - " VALUE \n", - "0 35.965 \n", - "1 27.670 \n", - "2 24.650 \n", - "3 142.600 \n", - "4 1.530 " + " VALUE \n", + "0 75.15 \n", + "1 369981.00 \n", + "2 53.19 \n", + "3 1068.00 \n", + "4 8047.00 " ] }, - "execution_count": 14, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -264,13 +298,20 @@ "cell_type": "code", "execution_count": 5, "id": "4218fceb-68f1-41be-8c08-3f6ad51424d5", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:42:02.052365Z", + "iopub.status.busy": "2024-08-28T17:42:02.052087Z", + "iopub.status.idle": "2024-08-28T17:42:03.213745Z", + "shell.execute_reply": "2024-08-28T17:42:03.213064Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Filtering for stocks belonging to the NYSE took 1.480584958999998 seconds in Snowpark pandas\n" + "Filtering for stocks belonging to the NYSE took 1.1570580000000064 seconds in Snowpark pandas\n" ] } ], @@ -297,13 +338,20 @@ "cell_type": "code", "execution_count": 6, "id": "5d456c29-7689-4599-bcd6-02c646ef8f58", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:42:03.218253Z", + "iopub.status.busy": "2024-08-28T17:42:03.217966Z", + "iopub.status.idle": "2024-08-28T17:42:04.219058Z", + "shell.execute_reply": "2024-08-28T17:42:04.218450Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Filtering for the Pre-Market Open price for the above stocks took 1.0095136250000678 seconds in Snowpark pandas\n" + "Filtering for the Pre-Market Open price for the above stocks took 0.9963804170000117 seconds in Snowpark pandas\n" ] } ], @@ -328,13 +376,20 @@ "cell_type": "code", "execution_count": 7, "id": "2f8f893a-c7dc-4e08-bace-3c93ada282cf", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:42:04.221691Z", + "iopub.status.busy": "2024-08-28T17:42:04.221503Z", + "iopub.status.idle": "2024-08-28T17:42:08.104300Z", + "shell.execute_reply": "2024-08-28T17:42:08.103522Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Pivoting the DataFrame took 4.195186291000027 seconds in Snowpark pandas\n" + "Pivoting the DataFrame took 3.878563791999966 seconds in Snowpark pandas\n" ] } ], @@ -350,7 +405,14 @@ "cell_type": "code", "execution_count": 8, "id": "65c0b9d1-a3be-4d05-9481-f54628f3b793", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:42:08.108598Z", + "iopub.status.busy": "2024-08-28T17:42:08.108313Z", + "iopub.status.idle": "2024-08-28T17:42:11.589114Z", + "shell.execute_reply": "2024-08-28T17:42:11.588214Z" + } + }, "outputs": [ { "data": { @@ -476,13 +538,20 @@ "cell_type": "code", "execution_count": 9, "id": "5b06f23b-12dc-4387-bb87-bc4cbcff6a85", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:42:11.594268Z", + "iopub.status.busy": "2024-08-28T17:42:11.593913Z", + "iopub.status.idle": "2024-08-28T17:42:13.693659Z", + "shell.execute_reply": "2024-08-28T17:42:13.692959Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Resampling the DataFrame took 2.099826750000034 seconds in Snowpark pandas\n" + "Resampling the DataFrame took 2.095007124999995 seconds in Snowpark pandas\n" ] } ], @@ -498,7 +567,14 @@ "cell_type": "code", "execution_count": 10, "id": "8978f55a-c28a-4b7f-9f20-4a2952d2a857", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:42:13.698712Z", + "iopub.status.busy": "2024-08-28T17:42:13.698431Z", + "iopub.status.idle": "2024-08-28T17:42:14.012750Z", + "shell.execute_reply": "2024-08-28T17:42:14.011861Z" + } + }, "outputs": [ { "data": { @@ -529,6 +605,7 @@ "2023-10-24 121.47\n", "2024-01-23 131.54\n", "2024-04-23 152.77\n", + "2024-07-23 156.60\n", "Freq: None, Name: All-Day Low, dtype: float64" ] }, @@ -553,13 +630,20 @@ "cell_type": "code", "execution_count": 11, "id": "fb467dd6-cc74-423f-b17b-46541f5bbff8", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:42:14.018887Z", + "iopub.status.busy": "2024-08-28T17:42:14.018476Z", + "iopub.status.idle": "2024-08-28T17:42:14.867704Z", + "shell.execute_reply": "2024-08-28T17:42:14.866925Z" + } + }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Diffing the resampled data took 0.6961409589999903 seconds in Snowpark pandas\n" + "Diffing the resampled data took 0.8439803329999904 seconds in Snowpark pandas\n" ] } ], @@ -575,7 +659,14 @@ "cell_type": "code", "execution_count": 12, "id": "866628d5-5bf9-4212-bba2-bf5e816a70e1", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:42:14.872878Z", + "iopub.status.busy": "2024-08-28T17:42:14.872544Z", + "iopub.status.idle": "2024-08-28T17:42:15.255755Z", + "shell.execute_reply": "2024-08-28T17:42:15.254822Z" + } + }, "outputs": [ { "data": { @@ -606,6 +697,7 @@ "2023-10-24 -0.07\n", "2024-01-23 10.07\n", "2024-04-23 21.23\n", + "2024-07-23 3.83\n", "Freq: None, Name: All-Day Low, dtype: float64" ] }, @@ -622,7 +714,14 @@ "cell_type": "code", "execution_count": 13, "id": "a7593697-feb5-40a7-9d6c-7c011ad35186", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:42:15.261056Z", + "iopub.status.busy": "2024-08-28T17:42:15.260483Z", + "iopub.status.idle": "2024-08-28T17:42:15.265793Z", + "shell.execute_reply": "2024-08-28T17:42:15.265216Z" + } + }, "outputs": [ { "data": { @@ -668,7 +767,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/tests/notebooks/modin/TimeSeriesTesting.ipynb b/tests/notebooks/modin/TimeSeriesTesting.ipynb index 0460b46a370..b21dc046b66 100644 --- a/tests/notebooks/modin/TimeSeriesTesting.ipynb +++ b/tests/notebooks/modin/TimeSeriesTesting.ipynb @@ -10,9 +10,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 1, "id": "5ece8277-dc52-40f3-913f-1a3145df6bdc", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:35.535408Z", + "iopub.status.busy": "2024-08-28T17:27:35.535052Z", + "iopub.status.idle": "2024-08-28T17:27:36.951326Z", + "shell.execute_reply": "2024-08-28T17:27:36.950877Z" + } + }, "outputs": [], "source": [ "from pathlib import Path\n", @@ -30,9 +37,16 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 2, "id": "c127fb50-c570-46fb-a074-6e8eb3ede058", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:36.953723Z", + "iopub.status.busy": "2024-08-28T17:27:36.953532Z", + "iopub.status.idle": "2024-08-28T17:27:36.955730Z", + "shell.execute_reply": "2024-08-28T17:27:36.955323Z" + } + }, "outputs": [], "source": [ "import datetime\n", @@ -49,17 +63,32 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 3, "id": "8d5f4a0a-fe5c-4a94-94ba-f16d258f92a6", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:36.958090Z", + "iopub.status.busy": "2024-08-28T17:27:36.957962Z", + "iopub.status.idle": "2024-08-28T17:27:37.441073Z", + "shell.execute_reply": "2024-08-28T17:27:37.440761Z" + } + }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "`to_datetime` implementation has mismatches with pandas:\n", + "Snowpark pandas to_datetime uses Snowflake's automatic format detection to convert string to datetime when a format is not provided. In this case Snowflake's auto format may yield different result values compared to pandas..\n" + ] + }, { "data": { "text/plain": [ "DatetimeIndex(['2018-01-01', '2018-01-01', '2018-01-01'], dtype='datetime64[ns]', freq=None)" ] }, - "execution_count": 15, + "execution_count": 3, "metadata": {}, "output_type": "execute_result" } @@ -81,31 +110,28 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 4, "id": "28d01637-1093-43ea-a791-bc167243530e", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:37.443821Z", + "iopub.status.busy": "2024-08-28T17:27:37.443686Z", + "iopub.status.idle": "2024-08-28T17:27:37.973506Z", + "shell.execute_reply": "2024-08-28T17:27:37.973040Z" + } + }, "outputs": [ { - "ename": "SnowparkSessionException", - "evalue": "(1409): More than one active session is detected. When you call function 'udf' or use decorator '@udf', you must specify the 'session' parameter if you created multiple sessions.Alternatively, you can use 'session.udf.register' to register UDFs", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mSnowparkSessionException\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[16], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m dti \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m2018-01-01\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mh\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2\u001b[0m dti\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py:454\u001b[0m, in \u001b[0;36msnowpark_pandas_telemetry_standalone_function_decorator..wrap\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 447\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 448\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrap\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs): \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;66;03m# add a `type: ignore` for this function definition because the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 452\u001b[0m \u001b[38;5;66;03m# hints in-line here. We'll fix up the type with a `cast` before\u001b[39;00m\n\u001b[1;32m 453\u001b[0m \u001b[38;5;66;03m# returning the function.\u001b[39;00m\n\u001b[0;32m--> 454\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_telemetry_helper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mis_standalone_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py:283\u001b[0m, in \u001b[0;36m_telemetry_helper\u001b[0;34m(func, args, kwargs, is_standalone_function, property_name, property_method_type)\u001b[0m\n\u001b[1;32m 281\u001b[0m session \u001b[38;5;241m=\u001b[39m snowflake\u001b[38;5;241m.\u001b[39msnowpark\u001b[38;5;241m.\u001b[39msession\u001b[38;5;241m.\u001b[39m_get_active_session()\n\u001b[1;32m 282\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m SnowparkSessionException:\n\u001b[0;32m--> 283\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_run_func_helper\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 284\u001b[0m class_prefix \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 285\u001b[0m func\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__module__\u001b[39m\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m)[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 286\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_standalone_function\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m args[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[1;32m 288\u001b[0m )\n\u001b[1;32m 289\u001b[0m \u001b[38;5;66;03m# Else the decorated func is an instance method:\u001b[39;00m\n\u001b[1;32m 290\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py:182\u001b[0m, in \u001b[0;36m_run_func_helper\u001b[0;34m(func, args, kwargs)\u001b[0m\n\u001b[1;32m 177\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 179\u001b[0m \u001b[38;5;66;03m# Raise error caused by func, i.e. the api call\u001b[39;00m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;66;03m# while suppressing Telemetry caused exceptions like SnowparkSessionException from telemetry in the stack trace.\u001b[39;00m\n\u001b[1;32m 181\u001b[0m \u001b[38;5;66;03m# This prevents from adding telemetry error messages to regular API calls error messages.\u001b[39;00m\n\u001b[0;32m--> 182\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/_internal/telemetry.py:177\u001b[0m, in \u001b[0;36m_run_func_helper\u001b[0;34m(func, args, kwargs)\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 166\u001b[0m \u001b[38;5;124;03mThe helper function that run func, suppressing the possible previous telemetry exception context.\u001b[39;00m\n\u001b[1;32m 167\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[38;5;124;03m The return value of the function\u001b[39;00m\n\u001b[1;32m 175\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 176\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 177\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 178\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 179\u001b[0m \u001b[38;5;66;03m# Raise error caused by func, i.e. the api call\u001b[39;00m\n\u001b[1;32m 180\u001b[0m \u001b[38;5;66;03m# while suppressing Telemetry caused exceptions like SnowparkSessionException from telemetry in the stack trace.\u001b[39;00m\n\u001b[1;32m 181\u001b[0m \u001b[38;5;66;03m# This prevents from adding telemetry error messages to regular API calls error messages.\u001b[39;00m\n\u001b[1;32m 182\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m e \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/pandas/general.py:2159\u001b[0m, in \u001b[0;36mdate_range\u001b[0;34m(start, end, periods, freq, tz, normalize, name, inclusive, **kwargs)\u001b[0m\n\u001b[1;32m 2155\u001b[0m \u001b[38;5;66;03m# If a timezone is not explicitly given via `tz`, see if one can be inferred from the `start` and `end` endpoints.\u001b[39;00m\n\u001b[1;32m 2156\u001b[0m \u001b[38;5;66;03m# If more than one of these inputs provides a timezone, require that they all agree.\u001b[39;00m\n\u001b[1;32m 2157\u001b[0m tz \u001b[38;5;241m=\u001b[39m _infer_tz_from_endpoints(start, end, tz)\n\u001b[0;32m-> 2159\u001b[0m qc \u001b[38;5;241m=\u001b[39m \u001b[43mSnowflakeQueryCompiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_date_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2160\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2161\u001b[0m \u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2162\u001b[0m \u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2163\u001b[0m \u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfreq\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2164\u001b[0m \u001b[43m \u001b[49m\u001b[43mtz\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtz\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2165\u001b[0m \u001b[43m \u001b[49m\u001b[43mleft_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mleft_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2166\u001b[0m \u001b[43m \u001b[49m\u001b[43mright_inclusive\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mright_inclusive\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 2167\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 2168\u001b[0m \u001b[38;5;66;03m# Set date range as index column.\u001b[39;00m\n\u001b[1;32m 2169\u001b[0m qc \u001b[38;5;241m=\u001b[39m qc\u001b[38;5;241m.\u001b[39mset_index_from_columns(qc\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mtolist(), include_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n", - "File \u001b[0;32m~/Desktop/snowpark-python/venv/lib/python3.10/site-packages/modin/logging/logger_decorator.py:125\u001b[0m, in \u001b[0;36menable_logging..decorator..run_and_log\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 111\u001b[0m \u001b[38;5;124;03mCompute function with logging if Modin logging is enabled.\u001b[39;00m\n\u001b[1;32m 112\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 122\u001b[0m \u001b[38;5;124;03mAny\u001b[39;00m\n\u001b[1;32m 123\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m LogMode\u001b[38;5;241m.\u001b[39mget() \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdisable\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m--> 125\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mobj\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m logger \u001b[38;5;241m=\u001b[39m get_logger()\n\u001b[1;32m 128\u001b[0m logger\u001b[38;5;241m.\u001b[39mlog(log_level, start_line)\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/compiler/snowflake_query_compiler.py:668\u001b[0m, in \u001b[0;36mSnowflakeQueryCompiler.from_date_range\u001b[0;34m(cls, start, end, periods, freq, tz, left_inclusive, right_inclusive)\u001b[0m\n\u001b[1;32m 665\u001b[0m end \u001b[38;5;241m=\u001b[39m end\u001b[38;5;241m.\u001b[39mtz_localize(\u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 666\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(freq, Tick):\n\u001b[1;32m 667\u001b[0m \u001b[38;5;66;03m# generate nanosecond values\u001b[39;00m\n\u001b[0;32m--> 668\u001b[0m ns_values \u001b[38;5;241m=\u001b[39m \u001b[43mgenerator_utils\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate_regular_range\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 669\u001b[0m \u001b[43m \u001b[49m\u001b[43mstart\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mperiods\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfreq\u001b[49m\n\u001b[1;32m 670\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 671\u001b[0m dt_values \u001b[38;5;241m=\u001b[39m ns_values\u001b[38;5;241m.\u001b[39mseries_to_datetime()\n\u001b[1;32m 672\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:71\u001b[0m, in \u001b[0;36mgenerate_regular_range\u001b[0;34m(start, end, periods, freq)\u001b[0m\n\u001b[1;32m 67\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 68\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m( \u001b[38;5;66;03m# pragma: no cover\u001b[39;00m\n\u001b[1;32m 69\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mat least \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mstart\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m or \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mend\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m should be specified if a \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mperiod\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m is given.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 70\u001b[0m )\n\u001b[0;32m---> 71\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mgenerate_range\u001b[49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43me\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstride\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/modin/plugin/_internal/generator_utils.py:127\u001b[0m, in \u001b[0;36mgenerate_range\u001b[0;34m(start, end, step)\u001b[0m\n\u001b[1;32m 110\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mgenerate_range\u001b[39m(\n\u001b[1;32m 111\u001b[0m start: \u001b[38;5;28mint\u001b[39m,\n\u001b[1;32m 112\u001b[0m end: Optional[\u001b[38;5;28mint\u001b[39m],\n\u001b[1;32m 113\u001b[0m step: \u001b[38;5;28mint\u001b[39m,\n\u001b[1;32m 114\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msnowflake_query_compiler.SnowflakeQueryCompiler\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 115\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 116\u001b[0m \u001b[38;5;124;03m Use `session.range` to generate values in range and represent in a query compiler\u001b[39;00m\n\u001b[1;32m 117\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 124\u001b[0m \u001b[38;5;124;03m The query compiler containing int values\u001b[39;00m\n\u001b[1;32m 125\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m 126\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _create_qc_from_snowpark_dataframe(\n\u001b[0;32m--> 127\u001b[0m \u001b[43mget_active_session\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mrange(start, end, step)\n\u001b[1;32m 128\u001b[0m )\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/context.py:32\u001b[0m, in \u001b[0;36mget_active_session\u001b[0;34m()\u001b[0m\n\u001b[1;32m 24\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_active_session\u001b[39m() \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msnowflake.snowpark.Session\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 25\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns the current active Snowpark session.\u001b[39;00m\n\u001b[1;32m 26\u001b[0m \n\u001b[1;32m 27\u001b[0m \u001b[38;5;124;03m Raises: SnowparkSessionException: If there is more than one active session or no active sessions.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 30\u001b[0m \u001b[38;5;124;03m A :class:`Session` object for the current session.\u001b[39;00m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 32\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msnowflake\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msnowpark\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_active_session\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Desktop/snowpark-python/src/snowflake/snowpark/session.py:217\u001b[0m, in \u001b[0;36m_get_active_session\u001b[0;34m()\u001b[0m\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mnext\u001b[39m(\u001b[38;5;28miter\u001b[39m(_active_sessions))\n\u001b[1;32m 216\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(_active_sessions) \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SnowparkClientExceptionMessages\u001b[38;5;241m.\u001b[39mMORE_THAN_ONE_ACTIVE_SESSIONS()\n\u001b[1;32m 218\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 219\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m SnowparkClientExceptionMessages\u001b[38;5;241m.\u001b[39mSERVER_NO_DEFAULT_SESSION()\n", - "\u001b[0;31mSnowparkSessionException\u001b[0m: (1409): More than one active session is detected. When you call function 'udf' or use decorator '@udf', you must specify the 'session' parameter if you created multiple sessions.Alternatively, you can use 'session.udf.register' to register UDFs" - ] + "data": { + "text/plain": [ + "DatetimeIndex(['2018-01-01 00:00:00', '2018-01-01 01:00:00',\n", + " '2018-01-01 02:00:00'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -123,9 +149,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "1f2c79bc-2a9e-41f0-ab36-44fcc20d119a", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:37.976832Z", + "iopub.status.busy": "2024-08-28T17:27:37.976629Z", + "iopub.status.idle": "2024-08-28T17:27:37.978940Z", + "shell.execute_reply": "2024-08-28T17:27:37.978566Z" + } + }, "outputs": [], "source": [ "# TODO SNOW-1635620: uncomment when TimeDelta is implemented\n", @@ -134,9 +167,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "d7916dfa-9716-47e4-92a8-c3c852a3d802", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:37.981235Z", + "iopub.status.busy": "2024-08-28T17:27:37.981056Z", + "iopub.status.idle": "2024-08-28T17:27:37.983005Z", + "shell.execute_reply": "2024-08-28T17:27:37.982681Z" + } + }, "outputs": [], "source": [ "# TODO SNOW-1635620: uncomment when TimeDelta is implemented\n", @@ -153,10 +193,33 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "5aa8cd79-521b-42ee-a3a6-66be36603bcb", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:37.985074Z", + "iopub.status.busy": "2024-08-28T17:27:37.984931Z", + "iopub.status.idle": "2024-08-28T17:27:39.127895Z", + "shell.execute_reply": "2024-08-28T17:27:39.127293Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2018-01-01 00:00:00 0\n", + "2018-01-01 01:00:00 1\n", + "2018-01-01 02:00:00 2\n", + "2018-01-01 03:00:00 3\n", + "2018-01-01 04:00:00 4\n", + "Freq: None, dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "idx = pd.date_range(\"2018-01-01\", periods=5, freq=\"h\")\n", "ts = pd.Series(range(len(idx)), index=idx)\n", @@ -165,10 +228,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "796c954c-7f60-441b-b85e-1098824fae4b", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:39.131539Z", + "iopub.status.busy": "2024-08-28T17:27:39.131194Z", + "iopub.status.idle": "2024-08-28T17:27:39.966782Z", + "shell.execute_reply": "2024-08-28T17:27:39.966293Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2018-01-01 00:00:00 0.5\n", + "2018-01-01 02:00:00 2.5\n", + "2018-01-01 04:00:00 4.0\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ts.resample(\"2h\").mean()" ] @@ -183,10 +267,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "e7272da8-eae8-4e31-8a61-2f442a6780e0", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:39.969410Z", + "iopub.status.busy": "2024-08-28T17:27:39.969229Z", + "iopub.status.idle": "2024-08-28T17:27:39.972019Z", + "shell.execute_reply": "2024-08-28T17:27:39.971689Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Friday'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "friday = pd.Timestamp(\"2018-01-05\")\n", "friday.day_name()" @@ -194,10 +296,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "b69cb16a-9fc7-46fe-a6f6-a6a1ce635dc5", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:39.974318Z", + "iopub.status.busy": "2024-08-28T17:27:39.974172Z", + "iopub.status.idle": "2024-08-28T17:27:39.976711Z", + "shell.execute_reply": "2024-08-28T17:27:39.976411Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Saturday'" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "saturday = friday + pd.Timedelta(\"1 day\")\n", "saturday.day_name()" @@ -205,10 +325,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "064f4271-9485-497d-b176-b39d4f75248c", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:39.979219Z", + "iopub.status.busy": "2024-08-28T17:27:39.979068Z", + "iopub.status.idle": "2024-08-28T17:27:39.981624Z", + "shell.execute_reply": "2024-08-28T17:27:39.981354Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'Monday'" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "monday = friday + pd.offsets.BDay()\n", "monday.day_name()" @@ -216,10 +354,31 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "86bf9469-b7d3-44fc-900e-cfd67a065842", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:39.983587Z", + "iopub.status.busy": "2024-08-28T17:27:39.983443Z", + "iopub.status.idle": "2024-08-28T17:27:40.913877Z", + "shell.execute_reply": "2024-08-28T17:27:40.913560Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2012-01-01 NaN\n", + "2012-01-02 0.0\n", + "2012-01-03 1.0\n", + "Freq: None, dtype: float64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "rng = pd.date_range(\"2012-01-01\", \"2012-01-03\")\n", "ts = pd.Series(range(len(rng)), index=rng)\n", @@ -245,10 +404,30 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "0e4e6063-a60f-4a70-adca-2cb9b3b101f8", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:40.916090Z", + "iopub.status.busy": "2024-08-28T17:27:40.915940Z", + "iopub.status.idle": "2024-08-28T17:27:41.359184Z", + "shell.execute_reply": "2024-08-28T17:27:41.358781Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "DatetimeIndex(['2012-10-08 18:15:05', '2012-10-09 18:15:05',\n", + " '2012-10-10 18:15:05', '2012-10-11 18:15:05'],\n", + " dtype='datetime64[ns]', freq=None)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "stamps = pd.date_range(\"2012-10-08 18:15:05\", periods=4, freq=\"D\")\n", "stamps" @@ -256,9 +435,16 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "f4a38e8b-abcb-49c6-839d-01e4215d7d7a", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:41.361646Z", + "iopub.status.busy": "2024-08-28T17:27:41.361457Z", + "iopub.status.idle": "2024-08-28T17:27:41.363485Z", + "shell.execute_reply": "2024-08-28T17:27:41.363091Z" + } + }, "outputs": [], "source": [ "# TODO SNOW-1635620: uncomment when TimeDelta is implemented\n", @@ -275,10 +461,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "1febbd6a-1b57-4e6a-a48a-3eac565ad61d", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:41.365775Z", + "iopub.status.busy": "2024-08-28T17:27:41.365599Z", + "iopub.status.idle": "2024-08-28T17:27:41.413954Z", + "shell.execute_reply": "2024-08-28T17:27:41.413683Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2016-10-30 23:00:00+0200', tz='Europe/Helsinki')" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ts = pd.Timestamp(\"2016-10-30 00:00:00\", tz=\"Europe/Helsinki\")\n", "\n", @@ -287,10 +491,28 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "b8d03fb0-826f-4698-a6d0-f2b63f7d38dc", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:41.415634Z", + "iopub.status.busy": "2024-08-28T17:27:41.415523Z", + "iopub.status.idle": "2024-08-28T17:27:41.417793Z", + "shell.execute_reply": "2024-08-28T17:27:41.417561Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timestamp('2016-10-31 00:00:00+0200', tz='Europe/Helsinki')" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "ts + pd.DateOffset(days=1)" ] @@ -305,40 +527,112 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "dd818a8d-97c1-46f3-b29f-499ba92f22ae", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:41.419736Z", + "iopub.status.busy": "2024-08-28T17:27:41.419620Z", + "iopub.status.idle": "2024-08-28T17:27:42.097520Z", + "shell.execute_reply": "2024-08-28T17:27:42.097100Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('396 days 03:00:00')" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.to_datetime('2018-10-26 12:00:00') - pd.to_datetime('2017-09-25 09:00:00')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "7c9a87d2-7883-46a6-8433-dfa5900ca9b0", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:42.099835Z", + "iopub.status.busy": "2024-08-28T17:27:42.099657Z", + "iopub.status.idle": "2024-08-28T17:27:42.102502Z", + "shell.execute_reply": "2024-08-28T17:27:42.102144Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('6 days 07:00:00')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.Timestamp(\"2014-08-01 10:00\") - pd.Timestamp(\"2014-07-26 03:00\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "e78454b1-0d4c-42bc-a127-b21a4a7f09cf", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:42.104922Z", + "iopub.status.busy": "2024-08-28T17:27:42.104781Z", + "iopub.status.idle": "2024-08-28T17:27:42.107600Z", + "shell.execute_reply": "2024-08-28T17:27:42.107293Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('682 days 03:00:00')" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "pd.Timestamp(year=2017, month=1, day=1, hour=12) - pd.Timestamp(year=2015, month=2, day=19, hour=9)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "2534d141-1862-4901-ba70-7ed73ab9abdd", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2024-08-28T17:27:42.109761Z", + "iopub.status.busy": "2024-08-28T17:27:42.109628Z", + "iopub.status.idle": "2024-08-28T17:27:42.763799Z", + "shell.execute_reply": "2024-08-28T17:27:42.763158Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Timedelta('-31 days +03:09:02')" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "(pd.to_datetime(\"2018-8-26 15:09:02\") - pd.to_datetime('2018-09-26 12:00:00'))" ] @@ -368,7 +662,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/tests/notebooks/modin/VisualizingTaxiTripData.ipynb b/tests/notebooks/modin/VisualizingTaxiTripData.ipynb index 6bed3588807..7da0b8c4901 100644 --- a/tests/notebooks/modin/VisualizingTaxiTripData.ipynb +++ b/tests/notebooks/modin/VisualizingTaxiTripData.ipynb @@ -19,14 +19,20 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "id": "a48f71cf5ca0ead", "metadata": { "ExecuteTime": { "end_time": "2024-04-26T14:29:18.451575Z", "start_time": "2024-04-26T14:29:18.439220Z" }, - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:35.535431Z", + "iopub.status.busy": "2024-08-28T17:27:35.535097Z", + "iopub.status.idle": "2024-08-28T17:27:37.129341Z", + "shell.execute_reply": "2024-08-28T17:27:37.128903Z" + } }, "outputs": [], "source": [ @@ -48,10 +54,16 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "id": "691d582a439c5a80", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:37.131387Z", + "iopub.status.busy": "2024-08-28T17:27:37.131228Z", + "iopub.status.idle": "2024-08-28T17:27:41.984160Z", + "shell.execute_reply": "2024-08-28T17:27:41.983698Z" + } }, "outputs": [], "source": [ @@ -60,10 +72,16 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "id": "d80067becc67183e", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:41.986666Z", + "iopub.status.busy": "2024-08-28T17:27:41.986440Z", + "iopub.status.idle": "2024-08-28T17:27:42.229686Z", + "shell.execute_reply": "2024-08-28T17:27:42.229248Z" + } }, "outputs": [], "source": [ @@ -83,15 +101,21 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "id": "6fb5a3f01b2b0f26", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:42.232608Z", + "iopub.status.busy": "2024-08-28T17:27:42.232433Z", + "iopub.status.idle": "2024-08-28T17:27:43.149053Z", + "shell.execute_reply": "2024-08-28T17:27:43.148761Z" + } }, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -135,10 +159,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "id": "f71c3e100b98fbf2", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:43.152387Z", + "iopub.status.busy": "2024-08-28T17:27:43.152260Z", + "iopub.status.idle": "2024-08-28T17:27:43.849823Z", + "shell.execute_reply": "2024-08-28T17:27:43.849504Z" + } }, "outputs": [ { @@ -356,7 +386,7 @@ "6 485 485 485 485 " ] }, - "execution_count": 9, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -367,10 +397,16 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "id": "8bc9d6db83a8deab", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:43.851746Z", + "iopub.status.busy": "2024-08-28T17:27:43.851616Z", + "iopub.status.idle": "2024-08-28T17:27:44.313330Z", + "shell.execute_reply": "2024-08-28T17:27:44.313030Z" + } }, "outputs": [ { @@ -386,7 +422,7 @@ "Name: VENDORID, dtype: int64" ] }, - "execution_count": 10, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -398,15 +434,31 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 7, "id": "5e87fd5eee32716b", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:44.315144Z", + "iopub.status.busy": "2024-08-28T17:27:44.315017Z", + "iopub.status.idle": "2024-08-28T17:27:45.402962Z", + "shell.execute_reply": "2024-08-28T17:27:45.402634Z" + } }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:snowflake.snowpark.modin.plugin.utils.warning_message:Calling __array__ on a modin object materializes all data into local memory.\n", + "Since this can be called by 3rd party libraries silently, it can lead to \n", + "unexpected delays or high memory usage. Use to_pandas() or to_numpy() to do \n", + "this once explicitly.\n" + ] + }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -441,12 +493,26 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 8, "id": "c3b3cbdfec3a8c7", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:45.404941Z", + "iopub.status.busy": "2024-08-28T17:27:45.404803Z", + "iopub.status.idle": "2024-08-28T17:27:50.361662Z", + "shell.execute_reply": "2024-08-28T17:27:50.361336Z" + } }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING:snowflake.snowpark.modin.plugin.utils.warning_message:`to_datetime` implementation has mismatches with pandas:\n", + "Snowpark pandas to_datetime uses Snowflake's automatic format detection to convert string to datetime when a format is not provided. In this case Snowflake's auto format may yield different result values compared to pandas..\n" + ] + }, { "data": { "text/plain": [ @@ -458,7 +524,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -494,10 +560,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 9, "id": "8349334c3a180ee9", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:50.364258Z", + "iopub.status.busy": "2024-08-28T17:27:50.364103Z", + "iopub.status.idle": "2024-08-28T17:27:55.090457Z", + "shell.execute_reply": "2024-08-28T17:27:55.090158Z" + } }, "outputs": [ { @@ -511,7 +583,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -551,10 +623,16 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 10, "id": "1c06746a382ca255", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:55.092288Z", + "iopub.status.busy": "2024-08-28T17:27:55.092162Z", + "iopub.status.idle": "2024-08-28T17:27:55.295492Z", + "shell.execute_reply": "2024-08-28T17:27:55.295063Z" + } }, "outputs": [], "source": [ @@ -563,15 +641,21 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 11, "id": "e9decc93d86484b8", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:27:55.298084Z", + "iopub.status.busy": "2024-08-28T17:27:55.297856Z", + "iopub.status.idle": "2024-08-28T17:28:02.704856Z", + "shell.execute_reply": "2024-08-28T17:28:02.704559Z" + } }, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -623,10 +707,16 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 12, "id": "324ce2b2a505896b", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:28:02.706815Z", + "iopub.status.busy": "2024-08-28T17:28:02.706678Z", + "iopub.status.idle": "2024-08-28T17:28:02.827733Z", + "shell.execute_reply": "2024-08-28T17:28:02.827310Z" + } }, "outputs": [], "source": [ @@ -638,15 +728,21 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 13, "id": "b8bcfad9f2595785", "metadata": { - "collapsed": false + "collapsed": false, + "execution": { + "iopub.execute_input": "2024-08-28T17:28:02.829840Z", + "iopub.status.busy": "2024-08-28T17:28:02.829693Z", + "iopub.status.idle": "2024-08-28T17:28:07.543554Z", + "shell.execute_reply": "2024-08-28T17:28:07.542916Z" + } }, "outputs": [ { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -706,7 +802,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" + "version": "3.9.18" } }, "nbformat": 4, diff --git a/tests/unit/modin/test_snowpark_pandas_types.py b/tests/unit/modin/test_snowpark_pandas_types.py index 36d64d164c8..754f031c1a6 100644 --- a/tests/unit/modin/test_snowpark_pandas_types.py +++ b/tests/unit/modin/test_snowpark_pandas_types.py @@ -14,6 +14,7 @@ SnowparkPandasType, TimedeltaType, ) +from snowflake.snowpark.types import LongType def test_timedelta_type_is_immutable(): @@ -68,3 +69,9 @@ def test_get_snowpark_pandas_type_for_pandas_type(pandas_obj, snowpark_pandas_ty ) def test_TimedeltaType_from_pandas(timedelta, snowpark_pandas_value): assert TimedeltaType.from_pandas(timedelta) == snowpark_pandas_value + + +def test_equals(): + assert TimedeltaType() == TimedeltaType() + assert TimedeltaType() != LongType() + assert LongType() != TimedeltaType() diff --git a/tests/unit/test_session.py b/tests/unit/test_session.py index 86c8d54f7bb..262c9e82c44 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/test_session.py @@ -245,7 +245,9 @@ def run_query(sql: str): ) -def test_resolve_packages_no_side_effect(): +def test_resolve_packages_side_effect(): + """Python stored procedure depends on this behavior to add packages to the session.""" + def mock_get_information_schema_packages(table_name: str): result = MagicMock() result.filter().group_by().agg()._internal_collect_with_tag.return_value = [ @@ -261,15 +263,19 @@ def mock_get_information_schema_packages(table_name: str): existing_packages = {} - resolved_packages, _ = session._resolve_packages( + resolved_packages = session._resolve_packages( ["random_package_name"], existing_packages_dict=existing_packages, validate_package=True, include_pandas=False, ) - assert len(resolved_packages) == 2 # random_package_name and cloudpickle - assert len(existing_packages) == 0 + assert ( + len(resolved_packages) == 2 + ), resolved_packages # random_package_name and cloudpickle + assert ( + len(existing_packages) == 1 + ), existing_packages # {"random_package_name": "random_package_name"} @pytest.mark.skipif(not is_pandas_available, reason="requires pandas for write_pandas")