From 86f7412325511975d9af87dd70b7719d7aff6a81 Mon Sep 17 00:00:00 2001 From: cindykrafft Date: Wed, 18 Oct 2023 16:59:28 -0700 Subject: [PATCH 1/3] SNOW-853533: Support case sensitivity in to_local_iterator --- src/snowflake/snowpark/dataframe.py | 12 ++++++-- tests/integ/test_dataframe.py | 44 +++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 3 deletions(-) diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index dea1d5df060..a5474d3fa4d 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -663,19 +663,22 @@ def _execute_and_get_query_id( @overload def to_local_iterator( - self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = True + self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = True, + case_sensitive: bool = True, ) -> Iterator[Row]: ... # pragma: no cover @overload def to_local_iterator( - self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = False + self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = False, + case_sensitive: bool = True, ) -> AsyncJob: ... # pragma: no cover @df_collect_api_telemetry def to_local_iterator( - self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = True + self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = True, + case_sensitive: bool = True, ) -> Union[Iterator[Row], AsyncJob]: """Executes the query representing this DataFrame and returns an iterator of :class:`Row` objects that you can use to retrieve the results. @@ -696,6 +699,8 @@ def to_local_iterator( block: A bool value indicating whether this function will wait until the result is available. When it is ``False``, this function executes the underlying queries of the dataframe asynchronously and returns an :class:`AsyncJob`. + case_sensitive: A bool value which is controls the case sensitivity of the fields in the + :class:`Row` objects returned by the ``to_local_iterator``. Defaults to ``True``. """ return self._session._conn.execute( self._plan, @@ -707,6 +712,7 @@ def to_local_iterator( self._session.query_tag, SKIP_LEVELS_THREE, ), + case_sensitive=case_sensitive ) def __copy__(self) -> "DataFrame": diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index 7c098dac3f9..6ac9e980874 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -1956,6 +1956,50 @@ def test_case_insensitive_collect(session): assert row["p@$$w0rd"] == "test" assert row["P@$$W0RD"] == "test" +def test_case_insensitive_local_iterator(session): + df = session.create_dataframe( + [["Gordon", 153]], schema=["firstname", "matches_won"] + ) + df_quote = session.create_dataframe( + [["Gordon", 153]], schema=["'quotedName'", "quoted-won"] + ) + + # tests for sync collect + row = list(df.to_local_iterator(case_sensitive=False))[0] + assert row.firstName == "Gordon" + assert row.FIRSTNAME == "Gordon" + assert row.FiRstNamE == "Gordon" + assert row["firstname"] == "Gordon" + assert row["FIRSTNAME"] == "Gordon" + assert row["FirstName"] == "Gordon" + + assert row.matches_won == 153 + assert row.MATCHES_WON == 153 + assert row.MaTchEs_WoN == 153 + assert row["matches_won"] == 153 + assert row["Matches_Won"] == 153 + assert row["MATCHES_WON"] == 153 + + with pytest.raises( + ValueError, + match="Case insensitive fields is not supported in presence of quoted columns", + ): + row = list(df_quote.to_local_iterator(case_sensitive=False))[0] + + # special character tests + df_login = session.create_dataframe( + [["admin", "test"], ["snowman", "test"]], schema=["username", "p@$$w0rD"] + ) + row = list(df_login.to_local_iterator(case_sensitive=False))[0] + + assert row.username == "admin" + assert row.UserName == "admin" + assert row.usErName == "admin" + + assert row["p@$$w0rD"] == "test" + assert row["p@$$w0rd"] == "test" + assert row["P@$$W0RD"] == "test" + def test_dropna(session): Utils.check_answer(TestData.double3(session).dropna(), [Row(1.0, 1)]) From 74f07c04f3d64e2c0f1036fdf9a091b914f2d500 Mon Sep 17 00:00:00 2001 From: cindykrafft Date: Thu, 19 Oct 2023 13:30:15 -0700 Subject: [PATCH 2/3] Response to code review comments --- src/snowflake/snowpark/dataframe.py | 4 ++-- tests/integ/test_dataframe.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index a5474d3fa4d..ce4b64f5e37 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -577,7 +577,7 @@ def collect( block: A bool value indicating whether this function will wait until the result is available. When it is ``False``, this function executes the underlying queries of the dataframe asynchronously and returns an :class:`AsyncJob`. - case_sensitive: A bool value which is controls the case sensitivity of the fields in the + case_sensitive: A bool value which controls the case sensitivity of the fields in the :class:`Row` objects returned by the ``collect``. Defaults to ``True``. See also: @@ -699,7 +699,7 @@ def to_local_iterator( block: A bool value indicating whether this function will wait until the result is available. When it is ``False``, this function executes the underlying queries of the dataframe asynchronously and returns an :class:`AsyncJob`. - case_sensitive: A bool value which is controls the case sensitivity of the fields in the + case_sensitive: A bool value which controls the case sensitivity of the fields in the :class:`Row` objects returned by the ``to_local_iterator``. Defaults to ``True``. """ return self._session._conn.execute( diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index 6ac9e980874..2165f276354 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -1965,7 +1965,7 @@ def test_case_insensitive_local_iterator(session): ) # tests for sync collect - row = list(df.to_local_iterator(case_sensitive=False))[0] + row = next(df.to_local_iterator(case_sensitive=False)) assert row.firstName == "Gordon" assert row.FIRSTNAME == "Gordon" assert row.FiRstNamE == "Gordon" @@ -1984,13 +1984,13 @@ def test_case_insensitive_local_iterator(session): ValueError, match="Case insensitive fields is not supported in presence of quoted columns", ): - row = list(df_quote.to_local_iterator(case_sensitive=False))[0] + next(df_quote.to_local_iterator(case_sensitive=False)) # special character tests df_login = session.create_dataframe( [["admin", "test"], ["snowman", "test"]], schema=["username", "p@$$w0rD"] ) - row = list(df_login.to_local_iterator(case_sensitive=False))[0] + row = next(df_login.to_local_iterator(case_sensitive=False)) assert row.username == "admin" assert row.UserName == "admin" From 5f72bc9d37ed2df320069f7fec874c00aa8ee261 Mon Sep 17 00:00:00 2001 From: cindykrafft Date: Sun, 22 Oct 2023 13:19:30 -0700 Subject: [PATCH 3/3] Update changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a3151b1144e..d7fbbc8ae52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## 1.10.0 (TBD) +### New Features +- Added support for managing case sensitivity in `DataFrame.to_local_iterator()`. + ### Behavior change - Changed the behavior of `date_format`: