From d48a75d1fd928ecfb3ac149941d11f9a3c5d6ed1 Mon Sep 17 00:00:00 2001 From: Sophie Tan Date: Wed, 29 May 2024 16:00:20 -0700 Subject: [PATCH 1/3] Add changes --- CHANGELOG.md | 2 ++ src/snowflake/snowpark/mock/_plan.py | 24 +++++++++++------------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b887aafce07..bf18db4cd62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,8 @@ - Fixed a bug where creating DataFrame with empty data of type `DateType` raises `AttributeError`. - Fixed a bug that table merge fails when update clause exists but no update takes place. - Fixed a bug in mock implementation of `to_char` that raises `IndexError` when incoming column has inconsecutive row index. +- Fixed a bug in handling of `CaseExpr` expressions that raises `IndexError` when incoming column has inconsecutive row index. +- Fixed a bug in implementation of `Column.like` that raises `IndexError` when incoming column has inconsecutive row index. #### Improvements diff --git a/src/snowflake/snowpark/mock/_plan.py b/src/snowflake/snowpark/mock/_plan.py index e88edca5fa3..d2503b80e21 100644 --- a/src/snowflake/snowpark/mock/_plan.py +++ b/src/snowflake/snowpark/mock/_plan.py @@ -1643,18 +1643,16 @@ def calculate_expression( child_column = calculate_expression( exp.child, input_data, analyzer, expr_to_alias ) - return ColumnEmulator( - data=[bool(data is None) for data in child_column], - sf_type=ColumnType(BooleanType(), True), - ) + res = child_column.apply(lambda x: bool(x is None)) + res.sf_type = ColumnType(BooleanType(), True) + return res if isinstance(exp, IsNotNull): child_column = calculate_expression( exp.child, input_data, analyzer, expr_to_alias ) - return ColumnEmulator( - data=[bool(data is not None) for data in child_column], - sf_type=ColumnType(BooleanType(), True), - ) + res = child_column.apply(lambda x: bool(x is not None)) + res.sf_type = ColumnType(BooleanType(), True) + return res if isinstance(exp, IsNaN): child_column = calculate_expression( exp.child, input_data, analyzer, expr_to_alias @@ -1813,11 +1811,12 @@ def _match_pattern(row) -> bool: return result if isinstance(exp, Like): lhs = calculate_expression(exp.expr, input_data, analyzer, expr_to_alias) + pattern = convert_wildcard_to_regex( str( - calculate_expression(exp.pattern, input_data, analyzer, expr_to_alias)[ - 0 - ] + calculate_expression( + exp.pattern, input_data, analyzer, expr_to_alias + ).iloc[0] ) ) result = lhs.str.match(pattern) @@ -1868,8 +1867,7 @@ def _match_pattern(row) -> bool: return res if isinstance(exp, CaseWhen): remaining = input_data - output_data = ColumnEmulator([None] * len(input_data)) - output_data.sf_type = None + output_data = ColumnEmulator([None] * len(input_data), index=input_data.index) for case in exp.branches: condition = calculate_expression( case[0], input_data, analyzer, expr_to_alias From ff6d57b3ecb4f6fb90d32ea252ece41ce235bc9e Mon Sep 17 00:00:00 2001 From: Sophie Tan Date: Thu, 6 Jun 2024 14:27:57 -0700 Subject: [PATCH 2/3] Add tests --- tests/mock/test_column.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 tests/mock/test_column.py diff --git a/tests/mock/test_column.py b/tests/mock/test_column.py new file mode 100644 index 00000000000..ee9ec5c9885 --- /dev/null +++ b/tests/mock/test_column.py @@ -0,0 +1,20 @@ +# +# Copyright (c) 2012-2024 Snowflake Computing Inc. All rights reserved. +# + +from snowflake.snowpark.functions import col, when +from snowflake.snowpark.row import Row + + +def test_casewhen_with_non_zero_row_index(session): + df = session.create_dataframe([[1, 2], [3, 4]], schema=["a", "b"]) + assert df.filter(col("a") > 1).select( + when(col("a").is_null(), 5).when(col("a") == 1, 6).otherwise(7).as_("a") + ).collect() == [Row(A=7)] + + +def test_regexp_with_non_zero_row_index(session): + df = session.create_dataframe([["1", 2], ["3", 4]], schema=["a", "b"]) + assert df.filter(col("b") > 2).select( + col("a").like("1").alias("res") + ).collect() == [Row(RES=False)] From e6fe7ffc5c7ab787dca835b76fe2bfcf1b9b49a2 Mon Sep 17 00:00:00 2001 From: Sophie Tan Date: Wed, 12 Jun 2024 14:23:28 -0700 Subject: [PATCH 3/3] Address comments --- CHANGELOG.md | 6 +++--- tests/mock/test_column.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf18db4cd62..bf440b675ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,9 +27,9 @@ - Fixed a bug in convert_timezone that made the setting the source_timezone parameter return an error. - Fixed a bug where creating DataFrame with empty data of type `DateType` raises `AttributeError`. - Fixed a bug that table merge fails when update clause exists but no update takes place. -- Fixed a bug in mock implementation of `to_char` that raises `IndexError` when incoming column has inconsecutive row index. -- Fixed a bug in handling of `CaseExpr` expressions that raises `IndexError` when incoming column has inconsecutive row index. -- Fixed a bug in implementation of `Column.like` that raises `IndexError` when incoming column has inconsecutive row index. +- Fixed a bug in mock implementation of `to_char` that raises `IndexError` when incoming column has nonconsecutive row index. +- Fixed a bug in handling of `CaseExpr` expressions that raises `IndexError` when incoming column has nonconsecutive row index. +- Fixed a bug in implementation of `Column.like` that raises `IndexError` when incoming column has nonconsecutive row index. #### Improvements diff --git a/tests/mock/test_column.py b/tests/mock/test_column.py index ee9ec5c9885..9a2e27f37ef 100644 --- a/tests/mock/test_column.py +++ b/tests/mock/test_column.py @@ -13,7 +13,7 @@ def test_casewhen_with_non_zero_row_index(session): ).collect() == [Row(A=7)] -def test_regexp_with_non_zero_row_index(session): +def test_like_with_non_zero_row_index(session): df = session.create_dataframe([["1", 2], ["3", 4]], schema=["a", "b"]) assert df.filter(col("b") > 2).select( col("a").like("1").alias("res")