diff --git a/CHANGELOG.md b/CHANGELOG.md index f56961d9f81..3f3ccb77480 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -61,6 +61,7 @@ - Fixed a bug where the truncate mode in `DataFrameWriter.save_as_table` incorrectly handled DataFrames containing only a subset of columns from the existing table. - Fixed a bug where function `to_timestamp` does not set the default timezone of the column datatype. +- Fixed a bug where joins on dataframes with shared lineage would cause a KeyError exception. ### Snowpark pandas API Updates diff --git a/src/snowflake/snowpark/mock/_plan.py b/src/snowflake/snowpark/mock/_plan.py index 11e54802eea..754defe1425 100644 --- a/src/snowflake/snowpark/mock/_plan.py +++ b/src/snowflake/snowpark/mock/_plan.py @@ -1031,16 +1031,10 @@ def aggregate_by_groups(cur_group: TableEmulator): ] result_df = result_df[reordered_cols] - common_columns = set(L_expr_to_alias.keys()).intersection( - R_expr_to_alias.keys() - ) + # If both sides have a column with shared lineage the left side alias is prefered new_expr_to_alias = { - k: v - for k, v in { - **L_expr_to_alias, - **R_expr_to_alias, - }.items() - if k not in common_columns + **L_expr_to_alias, + **R_expr_to_alias, } expr_to_alias.update(new_expr_to_alias) diff --git a/src/snowflake/snowpark/mock/_select_statement.py b/src/snowflake/snowpark/mock/_select_statement.py index bd0ecd906b0..af3aca7fd1b 100644 --- a/src/snowflake/snowpark/mock/_select_statement.py +++ b/src/snowflake/snowpark/mock/_select_statement.py @@ -311,7 +311,7 @@ def select(self, cols: List[Expression]) -> "SelectStatement": ColumnChangeState.NEW, ): can_be_flattened = can_projection_dependent_columns_be_flattened( - dependent_columns, subquery_column_states + dependent_columns, new_column_states ) if not can_be_flattened: break diff --git a/tests/integ/scala/test_dataframe_join_suite.py b/tests/integ/scala/test_dataframe_join_suite.py index 5f92c7f887c..bbc4ea0ccf5 100644 --- a/tests/integ/scala/test_dataframe_join_suite.py +++ b/tests/integ/scala/test_dataframe_join_suite.py @@ -1476,10 +1476,6 @@ def test_nested_join_diamond_shape_workaround(session): Utils.check_answer(df5, [Row(1, 1)]) -@pytest.mark.skipif( - "config.getoption('local_testing_mode', default=False)", - reason="SNOW-1373887: Support basic diamond shaped joins in Local Testing", -) def test_dataframe_basic_diamond_shaped_join(session): df1 = session.create_dataframe([[1, 2], [3, 4], [5, 6]], schema=["a", "b"]) df2 = df1.filter(col("a") > 1).with_column("c", lit(7)) diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index 0576198c03c..dff4db705de 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -4057,10 +4057,6 @@ def test_select_star_select_columns(session): Utils.check_answer(df3, [Row(1, 2)]) -@pytest.mark.skipif( - "config.getoption('local_testing_mode', default=False)", - reason="SNOW-1373887 Basic diamond shaped joins are not supported", -) def test_select_star_join(session): df = session.create_dataframe([[1, 2]], schema=["a", "b"]) df_star = df.select("*")