snowflakedb · sfc-gh-aalam · Aug 28, 2024 · Aug 22, 2024 · Aug 26, 2024 · Aug 26, 2024
@@ -793,6 +793,25 @@ def get_copy_into_table_options(
     return file_format_type_options, copy_options
 
 
+def get_aliased_option_name(
+    key: str,
+    alias_map: Dict[str, str],
+) -> str:
+    """Method that takes a key and an option alias map as arguments and returns
+    the aliased key if the key is present in the alias map. Also raise a warning
+    if alias key is applied.
+    """
+    upper_key = key.strip().upper()
+    aliased_key = alias_map.get(upper_key, upper_key)
+    if aliased_key != upper_key:
+        logger.warning(
+            f"Option '{key}' is aliased to '{aliased_key}'. You may see unexpected behavior."
+            " Please refer to format specific options for more information"
+        )
+
+    return aliased_key
+
+
 def strip_double_quotes_in_like_statement_in_table_name(table_name: str) -> str:
     """
     this function is used by method _table_exists to handle double quotes in table name when calling

@@ -21,6 +21,7 @@
 from snowflake.snowpark._internal.utils import (
     INFER_SCHEMA_FORMAT_TYPES,
     TempObjectType,
+    get_aliased_option_name,
     get_copy_into_table_options,
     random_name_for_temp_object,
 )
@@ -43,6 +44,19 @@
 logger = getLogger(__name__)
 
 LOCAL_TESTING_SUPPORTED_FILE_FORMAT = ("JSON",)
+READER_OPTIONS_ALIAS_MAP = {
+    "DELIMITER": "FIELD_DELIMITER",
+    "HEADER": "PARSE_HEADER",
+    "PATHGLOBFILTER": "PATTERN",
+    "FILENAMEPATTERN": "PATTERN",
+    "INFERSCHEMA": "INFER_SCHEMA",
+    "SEP": "FIELD_DELIMITER",
+    "LINESEP": "RECORD_DELIMITER",
+    "QUOTE": "FIELD_OPTIONALLY_ENCLOSED_BY",
+    "NULLVALUE": "NULL_IF",
+    "DATEFORMAT": "DATE_FORMAT",
+    "TIMESTAMPFORMAT": "TIMESTAMP_FORMAT",
+}
 
 
 class DataFrameReader:
@@ -569,7 +583,8 @@ def option(self, key: str, value: Any) -> "DataFrameReader":
             key: Name of the option (e.g. ``compression``, ``skip_header``, etc.).
             value: Value of the option.
         """
-        self._cur_options[key.upper()] = value
+        aliased_key = get_aliased_option_name(key, READER_OPTIONS_ALIAS_MAP)
+        self._cur_options[aliased_key] = value
         return self
 
     def options(self, configs: Dict) -> "DataFrameReader":

@@ -20,6 +20,7 @@
 from snowflake.snowpark._internal.type_utils import ColumnOrName, ColumnOrSqlExpr
 from snowflake.snowpark._internal.utils import (
     SUPPORTED_TABLE_TYPES,
+    get_aliased_option_name,
     normalize_remote_file_or_dir,
     parse_table_name,
     str_to_enum,
@@ -39,6 +40,15 @@
 else:
     from collections.abc import Iterable
 
+WRITER_OPTIONS_ALIAS_MAP = {
+    "SEP": "FIELD_DELIMITER",
+    "LINESEP": "RECORD_DELIMITER",
+    "QUOTE": "FIELD_OPTIONALLY_ENCLOSED_BY",
+    "NULLVALUE": "NULL_IF",
+    "DATEFORMAT": "DATE_FORMAT",
+    "TIMESTAMPFORMAT": "TIMESTAMP_FORMAT",
+}
+
 
 class DataFrameWriter:
     """Provides methods for writing data from a :class:`DataFrame` to supported output destinations.
@@ -352,14 +362,23 @@ def copy_into_location(
             raise TypeError(  # pragma: no cover
                 f"'partition_by' is expected to be a column name, a Column object, or a sql expression. Got type {type(partition_by)}"
             )
+
+        # apply writer option alias mapping
+        format_type_aliased_options = None
+        if format_type_options:
+            format_type_aliased_options = {}
+            for key, value in format_type_options.items():
+                aliased_key = get_aliased_option_name(key, WRITER_OPTIONS_ALIAS_MAP)
+                format_type_aliased_options[aliased_key] = value
+
         df = self._dataframe._with_plan(
             CopyIntoLocationNode(
                 self._dataframe._plan,
                 stage_location,
                 partition_by=partition_by,
                 file_format_name=file_format_name,
                 file_format_type=file_format_type,
-                format_type_options=format_type_options,
+                format_type_options=format_type_aliased_options,
                 copy_options=copy_options,
                 header=header,
             )

@@ -20,6 +20,7 @@
     METADATA_FILENAME,
     METADATA_START_SCAN_TIME,
 )
+from snowflake.snowpark.dataframe_reader import READER_OPTIONS_ALIAS_MAP
 from snowflake.snowpark.exceptions import (
     SnowparkDataframeReaderException,
     SnowparkPlanException,
@@ -440,6 +441,19 @@ def mock_run_query(*args, **kwargs):
             assert "Could not infer csv schema due to exception:" in caplog.text
 
 
+@pytest.mark.parametrize("mode", ["select", "copy"])
+def test_reader_option_aliases(session, mode, caplog):
+    reader = get_reader(session, mode)
+    with caplog.at_level(logging.WARN):
+        for key, _aliased_key in READER_OPTIONS_ALIAS_MAP.items():
+            reader.option(key, "test")
+        assert (
+            f"Option '{key}' is aliased to '{_aliased_key}'. You may see unexpected behavior"
+            in caplog.text
+        )
+        caplog.clear()
+
+
 @pytest.mark.parametrize("mode", ["select", "copy"])
 def test_read_csv_incorrect_schema(session, mode):
     reader = get_reader(session, mode)

@@ -3,6 +3,7 @@
 #
 
 import copy
+import logging
 
 import pytest
 
@@ -535,7 +536,7 @@ def create_and_append_check_answer(table_name_input):
     "config.getoption('local_testing_mode', default=False)",
     reason="BUG: SNOW-1235716 should raise not implemented error not AttributeError: 'MockExecutionPlan' object has no attribute 'replace_repeated_subquery_with_cte'",
 )
-def test_writer_csv(session, tmpdir_factory):
+def test_writer_csv(session, caplog):
 
     """Tests for df.write.csv()."""
     df = session.create_dataframe([[1, 2], [3, 4], [5, 6], [3, 7]], schema=["a", "b"])
@@ -590,6 +591,32 @@ def test_writer_csv(session, tmpdir_factory):
         assert result6[0].rows_unloaded == ROWS_COUNT
         data6 = session.read.schema(schema).csv(f"@{path6}")
         Utils.assert_rows_count(data6, ROWS_COUNT)
+
+        # test option alias case
+        path7 = f"{temp_stage}/test_csv_example7/my_file.csv.gz"
+        with caplog.at_level(logging.WARNING):
+            result7 = df.write.csv(
+                path7,
+                format_type_options={"SEP": ":", "quote": '"'},
+                single=True,
+                header=True,
+            )
+        assert "Option 'SEP' is aliased to 'FIELD_DELIMITER'." in caplog.text
+        assert (
+            "Option 'quote' is aliased to 'FIELD_OPTIONALLY_ENCLOSED_BY'."
+            in caplog.text
+        )
+
+        assert result7[0].rows_unloaded == ROWS_COUNT
+        data7 = (
+            session.read.schema(schema)
+            .option("header", True)
+            .option("inferSchema", True)
+            .option("SEP", ":")
+            .option("quote", '"')
+            .csv(f"@{path7}")
+        )
+        Utils.check_answer(data7, df)
     finally:
         Utils.drop_stage(session, temp_stage)