diff --git a/CHANGELOG.md b/CHANGELOG.md index 80a9fa5d519..0314eb42006 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,13 @@ ## 1.10.0 (TBD) +### New Features +- Added support for managing case sensitivity in `DataFrame.to_local_iterator()`. + ### Bug Fixes - Fixed a bug in `DataFrame.to_pandas()` where converting snowpark dataframes to pandas dataframes was losing precision on integers with more than 19 digits. +- Fixed a bug that `session.add_packages` can not handle requirement specifier that contains project name with underscore and version. ### Behavior change @@ -12,10 +16,6 @@ - The `format` argument changed from optional to required. - The returned result changed from a date object to a date-formatted string. -### Bug Fixes - -- Fixed a bug that `session.add_packages` can not handle requirement specifier that contains project name with underscore and version. - ## 1.9.0 (2023-10-13) ### New Features diff --git a/src/snowflake/snowpark/dataframe.py b/src/snowflake/snowpark/dataframe.py index dea1d5df060..ce4b64f5e37 100644 --- a/src/snowflake/snowpark/dataframe.py +++ b/src/snowflake/snowpark/dataframe.py @@ -577,7 +577,7 @@ def collect( block: A bool value indicating whether this function will wait until the result is available. When it is ``False``, this function executes the underlying queries of the dataframe asynchronously and returns an :class:`AsyncJob`. - case_sensitive: A bool value which is controls the case sensitivity of the fields in the + case_sensitive: A bool value which controls the case sensitivity of the fields in the :class:`Row` objects returned by the ``collect``. Defaults to ``True``. See also: @@ -663,19 +663,22 @@ def _execute_and_get_query_id( @overload def to_local_iterator( - self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = True + self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = True, + case_sensitive: bool = True, ) -> Iterator[Row]: ... # pragma: no cover @overload def to_local_iterator( - self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = False + self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = False, + case_sensitive: bool = True, ) -> AsyncJob: ... # pragma: no cover @df_collect_api_telemetry def to_local_iterator( - self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = True + self, *, statement_params: Optional[Dict[str, str]] = None, block: bool = True, + case_sensitive: bool = True, ) -> Union[Iterator[Row], AsyncJob]: """Executes the query representing this DataFrame and returns an iterator of :class:`Row` objects that you can use to retrieve the results. @@ -696,6 +699,8 @@ def to_local_iterator( block: A bool value indicating whether this function will wait until the result is available. When it is ``False``, this function executes the underlying queries of the dataframe asynchronously and returns an :class:`AsyncJob`. + case_sensitive: A bool value which controls the case sensitivity of the fields in the + :class:`Row` objects returned by the ``to_local_iterator``. Defaults to ``True``. """ return self._session._conn.execute( self._plan, @@ -707,6 +712,7 @@ def to_local_iterator( self._session.query_tag, SKIP_LEVELS_THREE, ), + case_sensitive=case_sensitive ) def __copy__(self) -> "DataFrame": diff --git a/tests/integ/test_dataframe.py b/tests/integ/test_dataframe.py index 7c098dac3f9..2165f276354 100644 --- a/tests/integ/test_dataframe.py +++ b/tests/integ/test_dataframe.py @@ -1956,6 +1956,50 @@ def test_case_insensitive_collect(session): assert row["p@$$w0rd"] == "test" assert row["P@$$W0RD"] == "test" +def test_case_insensitive_local_iterator(session): + df = session.create_dataframe( + [["Gordon", 153]], schema=["firstname", "matches_won"] + ) + df_quote = session.create_dataframe( + [["Gordon", 153]], schema=["'quotedName'", "quoted-won"] + ) + + # tests for sync collect + row = next(df.to_local_iterator(case_sensitive=False)) + assert row.firstName == "Gordon" + assert row.FIRSTNAME == "Gordon" + assert row.FiRstNamE == "Gordon" + assert row["firstname"] == "Gordon" + assert row["FIRSTNAME"] == "Gordon" + assert row["FirstName"] == "Gordon" + + assert row.matches_won == 153 + assert row.MATCHES_WON == 153 + assert row.MaTchEs_WoN == 153 + assert row["matches_won"] == 153 + assert row["Matches_Won"] == 153 + assert row["MATCHES_WON"] == 153 + + with pytest.raises( + ValueError, + match="Case insensitive fields is not supported in presence of quoted columns", + ): + next(df_quote.to_local_iterator(case_sensitive=False)) + + # special character tests + df_login = session.create_dataframe( + [["admin", "test"], ["snowman", "test"]], schema=["username", "p@$$w0rD"] + ) + row = next(df_login.to_local_iterator(case_sensitive=False)) + + assert row.username == "admin" + assert row.UserName == "admin" + assert row.usErName == "admin" + + assert row["p@$$w0rD"] == "test" + assert row["p@$$w0rd"] == "test" + assert row["P@$$W0RD"] == "test" + def test_dropna(session): Utils.check_answer(TestData.double3(session).dropna(), [Row(1.0, 1)])